Compare commits
177 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e54f3cde94 | |||
| 77459d06d6 | |||
| 3202e555ab | |||
| 978052b5a2 | |||
| 19931208a9 | |||
| c39787ad96 | |||
| b5b6cdddb3 | |||
| a4123ace71 | |||
| 3bb48f2147 | |||
| c2c8f7e424 | |||
| b70c1b7c37 | |||
| 9c33582412 | |||
| 23d977e26b | |||
| 88b83d4daf | |||
| 98d616d82b | |||
| 59b7006e5a | |||
| 2d2cb2a244 | |||
| 02c9fdb18e | |||
| 3ba90f49cf | |||
| 009083882a | |||
| a98076196b | |||
| afe5a98474 | |||
| 80f2e2f619 | |||
| 897e9464a7 | |||
| c160bb8291 | |||
| a2332fb13d | |||
| 90c3fe16b5 | |||
| e0d9816c99 | |||
| fbbd0957bd | |||
| 2805256c33 | |||
| cefacb87af | |||
| d0575d286f | |||
| 80bf1993e0 | |||
| 3c6e2a2acc | |||
| dbf7b9b587 | |||
| 5cba0504df | |||
| 77d6bc5551 | |||
| d196ad1cab | |||
| b71771e52e | |||
| 256bb0607d | |||
| ff9a66fb72 | |||
| 363c76d274 | |||
| dfb2c6dfdb | |||
| 16d6ad4122 | |||
| 3856bb3a4f | |||
| 0b962b41fa | |||
| b6c400902e | |||
| 98f67e75d9 | |||
| f652e2d4ed | |||
| ecae5bc7f1 | |||
| 23a6f02ec2 | |||
| 4a7412e4f2 | |||
| 0cb224a7f1 | |||
| d44f3672be | |||
| c98500c303 | |||
| 4efbfa45c4 | |||
| 86a783e72f | |||
| 1054facffa | |||
| 18f5d0cb05 | |||
| a2403eaed9 | |||
| 1a9439d013 | |||
| c737e1ad7d | |||
| 9c02c2c4a2 | |||
| c4e9ca8f4d | |||
| aa99111a87 | |||
| 0b0d262462 | |||
| 07e392913f | |||
| d51bcd77c7 | |||
| b6cfc0a503 | |||
| 1e1689f1f2 | |||
| 78f0ffa9de | |||
| 50d88d611d | |||
| cfafa31ea2 | |||
| ffff9bb592 | |||
| a0f72fc39b | |||
| 5fde7690a5 | |||
| 66be23f0c4 | |||
| caa9b8b609 | |||
| f78e03bd0a | |||
| 5412864705 | |||
| 0da093c046 | |||
| 3199d0d90e | |||
| 4bfd552da7 | |||
| cb18eac7ec | |||
| bea8559f78 | |||
| 81f8b56b48 | |||
| db2efe9f52 | |||
| 77de7e794c | |||
| fb4e14d9b9 | |||
| 07916df330 | |||
| 5e735e9e56 | |||
| 24fdde89c6 | |||
| f3d3255de1 | |||
| fe21c2f487 | |||
| e4695cf289 | |||
| d72dcbacfb | |||
| 4ad681741d | |||
| 88ca2b0b03 | |||
| 8a51db92ed | |||
| 16371f2909 | |||
| c7339e68df | |||
| 06efb9e61b | |||
| aaacec087c | |||
| ed64d92904 | |||
| 6ccc6c87c1 | |||
| 623d80b6c8 | |||
| 7eb7f61483 | |||
| 8c893ca783 | |||
| d1383227b2 | |||
| a5687bbc65 | |||
| da466b3821 | |||
| eca8ec43c5 | |||
| 37c9b8e773 | |||
| 50ae9e94d1 | |||
| 429ac957c1 | |||
| 9312ad18ef | |||
| 2063615d37 | |||
| 4d225f73a8 | |||
| c13aa9183a | |||
| 662aec209a | |||
| 8440ddfecb | |||
| 0ce4794767 | |||
| 8674b2cd9a | |||
| 80862e7073 | |||
| a8c61eb320 | |||
| 8f89fbf8a7 | |||
| 33790bb5e7 | |||
| 7287e989a6 | |||
| 63fe2d496e | |||
| 4e8eb2dc0e | |||
| 78aeedafae | |||
| 2e6eee6ba1 | |||
| f23ae32077 | |||
| 739a477d3f | |||
| 8609b696c9 | |||
| 207fc9cb56 | |||
| fdaf547b06 | |||
| fa536f9714 | |||
| cba066f49b | |||
| 75f7bd8de4 | |||
| f85fff4398 | |||
| 3bcffaf52c | |||
| 3a19affb67 | |||
| 2b985ad526 | |||
| 4e761c1363 | |||
| 6673c8052b | |||
| 5e5002c883 | |||
| 9aef5ecf6c | |||
| f6c5f4e0a9 | |||
| c72fd3eb5a | |||
| b0435f9885 | |||
| 2341bda621 | |||
| 4634cc09d0 | |||
| 1607c89459 | |||
| d4df1e01df | |||
| ed31fdc0df | |||
| 5412bf0ba3 | |||
| 8a9d5e7c4d | |||
| 01956ee690 | |||
| e46e74ddbb | |||
| 63d65af41b | |||
| 8937f105ea | |||
| 1584b8fb2f | |||
| 2301fb2122 | |||
| 4aa6aa9812 | |||
| a53d67a35a | |||
| 3259984d1c | |||
| 5e3ed4071b | |||
| c090617afd | |||
| c5ecfa8f6c | |||
| 417bcda68c | |||
| 86d1473a6a | |||
| 9e0a9ccef4 | |||
| 7e1c3668bf | |||
| e5cce9caff | |||
| 67dba5f641 | |||
| db2fd9d8e9 |
@@ -1,4 +1,6 @@
|
|||||||
# Build stage
|
# Build stage
|
||||||
|
# ci-retrigger 2026-06-27: transient registry.meghsakha.com 502 on push (Runde 1) + last-build
|
||||||
|
# tag-bug skipped the rerun (Runde 2). No logic change — forces detect-changes to rebuild ai-sdk.
|
||||||
FROM golang:1.24-alpine AS builder
|
FROM golang:1.24-alpine AS builder
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
@@ -33,6 +35,14 @@ COPY migrations/ ./migrations/
|
|||||||
# Copy policy files (YAML rules)
|
# Copy policy files (YAML rules)
|
||||||
COPY policies/ ./policies/
|
COPY policies/ ./policies/
|
||||||
|
|
||||||
|
# Copy Compliance Execution Graph data (file-backed: Registry join-key copy + accepted control
|
||||||
|
# mappings + evidence requirements) consumed by GET /sdk/v1/compliance/obligation-status.
|
||||||
|
# data/obligations/obligation_join_keys.json is a synced copy of the repo-root Registry contract
|
||||||
|
# (the Obligation Registry owns the canonical file) — re-sync it when the Registry grows.
|
||||||
|
COPY data/control_mappings/ ./data/control_mappings/
|
||||||
|
COPY data/evidence_requirements/ ./data/evidence_requirements/
|
||||||
|
COPY data/obligations/ ./data/obligations/
|
||||||
|
|
||||||
# Create non-root user
|
# Create non-root user
|
||||||
RUN adduser -D -u 1000 appuser
|
RUN adduser -D -u 1000 appuser
|
||||||
USER appuser
|
USER appuser
|
||||||
|
|||||||
@@ -34,6 +34,8 @@ func main() {
|
|||||||
cmdEcho(os.Args[2:])
|
cmdEcho(os.Args[2:])
|
||||||
case "hierarchy":
|
case "hierarchy":
|
||||||
cmdHierarchy(os.Args[2:])
|
cmdHierarchy(os.Args[2:])
|
||||||
|
case "propose":
|
||||||
|
cmdPropose(os.Args[2:])
|
||||||
default:
|
default:
|
||||||
usage()
|
usage()
|
||||||
os.Exit(2)
|
os.Exit(2)
|
||||||
@@ -41,7 +43,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func usage() {
|
func usage() {
|
||||||
fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy> [args]")
|
fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy|propose> [args]")
|
||||||
}
|
}
|
||||||
|
|
||||||
func cmdReachability(_ []string) {
|
func cmdReachability(_ []string) {
|
||||||
|
|||||||
@@ -0,0 +1,188 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace/audit"
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type narrativeInput struct {
|
||||||
|
MachineType string `json:"machine_type"`
|
||||||
|
Narrative string `json:"narrative"`
|
||||||
|
MachineTypes []string `json:"machine_types,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// cmdPropose — Method P: offline dedup-candidate proposer.
|
||||||
|
//
|
||||||
|
// iace-audit propose <narrative.json> [<ground-truth.json>]
|
||||||
|
//
|
||||||
|
// Detect near-duplicate patterns, screen survivors against a ground truth (if
|
||||||
|
// given), judge them (heuristic by default, LLM when enabled), and write the
|
||||||
|
// human-review queue to audit-reports/proposals.{md,json}. Propose-only — it
|
||||||
|
// writes a report and never mutates the pattern library.
|
||||||
|
//
|
||||||
|
// Env:
|
||||||
|
//
|
||||||
|
// IACE_PROPOSE_THRESHOLD candidate score threshold (default 0.30)
|
||||||
|
// IACE_PROPOSE_LLM=1 use the offline LLM judge instead of the heuristic
|
||||||
|
// OLLAMA_URL ollama base URL (default http://localhost:11434)
|
||||||
|
// SELF_HOSTED_LLM_MODEL model name (default qwen2.5:32b-instruct)
|
||||||
|
func cmdPropose(args []string) {
|
||||||
|
if len(args) < 1 {
|
||||||
|
fmt.Fprintln(os.Stderr, "propose: usage: iace-audit propose <narrative.json> [<ground-truth.json>]")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
var in narrativeInput
|
||||||
|
must(readJSONFile(args[0], &in))
|
||||||
|
if in.Narrative == "" {
|
||||||
|
fmt.Fprintln(os.Stderr, "propose: narrative is empty")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
var gt *iace.GroundTruth
|
||||||
|
if len(args) >= 2 {
|
||||||
|
var g iace.GroundTruth
|
||||||
|
must(readJSONFile(args[1], &g))
|
||||||
|
gt = &g
|
||||||
|
}
|
||||||
|
|
||||||
|
threshold := envFloat("IACE_PROPOSE_THRESHOLD", 0.30)
|
||||||
|
hazards, mits, fired := iace.BuildProposerInput(in.Narrative, in.MachineType, in.MachineTypes)
|
||||||
|
candidates := iace.FindDedupCandidates(fired, threshold)
|
||||||
|
|
||||||
|
byID := make(map[string]iace.PatternMatch, len(fired))
|
||||||
|
for _, pm := range fired {
|
||||||
|
byID[pm.PatternID] = pm
|
||||||
|
}
|
||||||
|
|
||||||
|
judge := selectJudge(in.MachineType)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
var proposals []iace.JudgedProposal
|
||||||
|
blocked := 0
|
||||||
|
for _, c := range candidates {
|
||||||
|
var sr iace.ScreenResult
|
||||||
|
if gt != nil {
|
||||||
|
sr = iace.ScreenSupersession(gt, hazards, mits, c.KeepHazardName, c.DropName)
|
||||||
|
if sr.RecallAfter < sr.RecallBefore || sr.DistinctGT {
|
||||||
|
blocked++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
v, conf, rat := judge.Judge(ctx, c, byID[c.KeepPattern], byID[c.DropPattern])
|
||||||
|
proposals = append(proposals, iace.JudgedProposal{
|
||||||
|
Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
writeText("audit-reports/proposals.md", iace.RenderProposalQueue(in.MachineType, proposals))
|
||||||
|
writeJSON("audit-reports/proposals.json", proposals)
|
||||||
|
|
||||||
|
// Type 2: foreign-framing candidates (zone terms with no narrative echo).
|
||||||
|
framing := iace.FindFramingCandidates(fired, in.Narrative, envFloat("IACE_FRAMING_MIN_ORPHAN", 0.6))
|
||||||
|
writeText("audit-reports/framing.md", iace.RenderFramingQueue(in.MachineType, framing))
|
||||||
|
writeJSON("audit-reports/framing.json", framing)
|
||||||
|
|
||||||
|
// Type 3: vocab->tag proposals (unknown narrative tokens that pattern text
|
||||||
|
// names as a whole word, with a dominant shared required tag).
|
||||||
|
vocab := audit.RunVocabulary(map[string]any{"narrative": in.Narrative})
|
||||||
|
var vgaps []audit.DictionarySuggestion
|
||||||
|
for _, s := range vocab.SuggestedDictionaryEntries {
|
||||||
|
if len(s.SuggestedTags) > 0 {
|
||||||
|
vgaps = append(vgaps, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writeText("audit-reports/vocab.md", renderVocabQueue(in.MachineType, vgaps))
|
||||||
|
writeJSON("audit-reports/vocab.json", vgaps)
|
||||||
|
|
||||||
|
// Type 4: coverage blind-spots (empty ISO 12100 groups A-G) + LLM expansion.
|
||||||
|
gaps := iace.FindCoverageGaps(hazards)
|
||||||
|
var missing []iace.MissingHazard
|
||||||
|
if lj, ok := judge.(iace.LLMJudge); ok {
|
||||||
|
missing = iace.ProposeMissingHazards(ctx, lj.Completer, in.MachineType, in.Narrative, hazards, gaps)
|
||||||
|
}
|
||||||
|
writeText("audit-reports/coverage.md", iace.RenderCoverageQueue(in.MachineType, gaps, missing))
|
||||||
|
writeJSON("audit-reports/coverage.json", gaps)
|
||||||
|
|
||||||
|
printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
|
||||||
|
"fired_patterns": len(fired),
|
||||||
|
"candidates": len(candidates),
|
||||||
|
"in_queue": len(proposals),
|
||||||
|
"gt_blocked": blocked,
|
||||||
|
"framing_flags": len(framing),
|
||||||
|
"vocab_gaps": len(vgaps),
|
||||||
|
"coverage_gaps": len(gaps),
|
||||||
|
})
|
||||||
|
if gt == nil {
|
||||||
|
fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func selectJudge(machineClass string) iace.CandidateJudge {
|
||||||
|
if os.Getenv("IACE_PROPOSE_LLM") != "1" {
|
||||||
|
return iace.HeuristicJudge{}
|
||||||
|
}
|
||||||
|
base := envStr("OLLAMA_URL", "http://localhost:11434")
|
||||||
|
model := envStr("SELF_HOSTED_LLM_MODEL", "qwen2.5:32b-instruct")
|
||||||
|
reg := llm.NewProviderRegistry("ollama", "")
|
||||||
|
reg.Register(llm.NewOllamaAdapter(base, model))
|
||||||
|
fmt.Printf("using LLM judge (ollama %s, model %s)\n", base, model)
|
||||||
|
return iace.LLMJudge{Completer: iace.NewRegistryCompleter(reg, model), MachineClass: machineClass}
|
||||||
|
}
|
||||||
|
|
||||||
|
func readJSONFile(path string, v any) error {
|
||||||
|
raw, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return json.Unmarshal(raw, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeText(path, content string) {
|
||||||
|
_ = os.MkdirAll("audit-reports", 0o755)
|
||||||
|
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, "warn: could not write", path, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Println("→ wrote", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func envStr(key, def string) string {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|
||||||
|
func envFloat(key string, def float64) float64 {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
if f, err := strconv.ParseFloat(v, 64); err == nil {
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|
||||||
|
func renderVocabQueue(machine string, entries []audit.DictionarySuggestion) string {
|
||||||
|
var b strings.Builder
|
||||||
|
fmt.Fprintf(&b, "# Vocab→tag review queue — %s\n\n", machine)
|
||||||
|
fmt.Fprintf(&b, "%d unknown token(s) appear in pattern text but map to no dictionary tag. Propose-only — a human (or the LLM) confirms the tag, then adds a keyword_dictionary entry and pins a GT case.\n\n", len(entries))
|
||||||
|
for i, s := range entries {
|
||||||
|
tag := "<tag>"
|
||||||
|
if len(s.SuggestedTags) > 0 {
|
||||||
|
tag = s.SuggestedTags[0]
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&b, "## %d. \"%s\" → suggested tag(s): %s\n", i+1, s.Token, strings.Join(s.SuggestedTags, ", "))
|
||||||
|
fmt.Fprintf(&b, "- named by %d pattern(s): %s\n", len(s.PatternIDs), strings.Join(s.PatternIDs, ", "))
|
||||||
|
fmt.Fprintf(&b, "- suggested action: add keyword_dictionary entry {%q → %s} so narratives mentioning it trigger those patterns; human confirms\n\n", s.Token, tag)
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
// Control-Mapping: CRA Annex I -> NIST SP 800-53 Rev. 5. Eine Zeile = ein Mapping (Schema: ControlMapping).
|
||||||
|
// Reviewt 2026-06-25 (benjamin): 3 accepted, mapping_type=primary_implementation (kanonische Primaer-Control je Anforderung).
|
||||||
|
// Heimat der OWASP-Rejects (2)(e)/(2)(l)/(2)(i): dort war OWASP nicht der Zielstandard ("Mapping ueber NIST/BSI erforderlich").
|
||||||
|
// related-Controls (SC-3(3), RA-5, AC-6, SI-16, ...) folgen separat als mapping_type=supports — hier nur der kanonische Einstieg.
|
||||||
|
// obligation_id (Registry-Handoff #4 adoptiert, #6 auf CORE re-pointet 2026-06-26): SI-7->software_integrity_protection (CORE (2)(f)), SI-2->provide_security_updates, CM-7->attack_surface_minimization (CORE (2)(j)). Join exakt. Die domaenen-scoped IDs (signed_update_integrity, remote_access_attack_surface_min) bleiben gueltige Obligations und zeigen per specializes->CORE auf diese Ziele.
|
||||||
|
{"source_norm": "CRA Annex I Part I (2)(e) — Integritaet", "source_role": "operational_requirement", "target_framework": "NIST SP 800-53", "target_control": "SI-7", "mapping_type": "primary_implementation", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "NIST SI-7 = Software, Firmware, and Information Integrity — kanonische Integritaetskontrolle (Signaturpruefung, Manipulationserkennung).", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Primaere Implementierung der CRA-Integritaetsanforderung; OWASP war hier kein passender Treffer. Related (spaeter, supports): SA-10, CM-14.", "version": "2026-06-25", "obligation_id": "software_integrity_protection"}
|
||||||
|
{"source_norm": "CRA Annex I Part I (2)(l) — Sichere Updates", "source_role": "operational_requirement", "target_framework": "NIST SP 800-53", "target_control": "SI-2", "mapping_type": "primary_implementation", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "NIST SI-2 = Flaw Remediation — kanonische Update-/Patch-Kontrolle.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Primaere Implementierung der CRA-Update-Anforderung. Related (spaeter, supports): RA-5, CM-3, SA-11.", "version": "2026-06-25", "obligation_id": "provide_security_updates"}
|
||||||
|
{"source_norm": "CRA Annex I Part I (2)(i) — Angriffsflaeche minimieren", "source_role": "operational_requirement", "target_framework": "NIST SP 800-53", "target_control": "CM-7", "mapping_type": "primary_implementation", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "NIST CM-7 = Least Functionality — Deaktivierung nicht benoetigter Ports/Dienste/Funktionen.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "CM-7 als Primaer-Control fuer Angriffsflaeche (nicht SC-3(3)). Related (spaeter, supports): SC-3(3), AC-6, SI-16.", "version": "2026-06-25", "obligation_id": "attack_surface_minimization"}
|
||||||
@@ -2,13 +2,13 @@
|
|||||||
// Reviewt 2026-06-25 (benjamin): 7 accepted, 13 rejected. accepted = Audit-Wahrheit (Advisor nutzt acceptedOnly).
|
// Reviewt 2026-06-25 (benjamin): 7 accepted, 13 rejected. accepted = Audit-Wahrheit (Advisor nutzt acceptedOnly).
|
||||||
// rejected bleiben als Audit-Spur ("warum verworfen"). KEIN confidence — kuratiert = fachliche Feststellung.
|
// rejected bleiben als Audit-Spur ("warum verworfen"). KEIN confidence — kuratiert = fachliche Feststellung.
|
||||||
// Architekturbeweis: CRA -> OWASP fuer AppSec/Auth/Crypto/Logging; Ops/Update/Attack-Surface/Integritaet -> NIST/BSI.
|
// Architekturbeweis: CRA -> OWASP fuer AppSec/Auth/Crypto/Logging; Ops/Update/Attack-Surface/Integritaet -> NIST/BSI.
|
||||||
{"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V6.3.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V6 = Authentication.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V6 = Authentication, sauberer Treffer fuer Zugriffsschutz/Authentisierung.", "version": "2026-06-25"}
|
{"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V6.3.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V6 = Authentication.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V6 = Authentication, sauberer Treffer fuer Zugriffsschutz/Authentisierung.", "version": "2026-06-25", "obligation_id": "user_authentication_required"}
|
||||||
{"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V6.1.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V6 = Authentication.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V6 = Authentication, sauberer Treffer fuer Zugriffsschutz/Authentisierung.", "version": "2026-06-25"}
|
{"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V6.1.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V6 = Authentication.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V6 = Authentication, sauberer Treffer fuer Zugriffsschutz/Authentisierung.", "version": "2026-06-25", "obligation_id": "user_authentication_required"}
|
||||||
{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V11.2.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V11 = Cryptography.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Korrektur von V14: V11 = Cryptography, richtiger Bereich fuer Verschluesselung.", "version": "2026-06-25"}
|
{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V11.2.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V11 = Cryptography.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Korrektur von V14: V11 = Cryptography, richtiger Bereich fuer Verschluesselung.", "version": "2026-06-25", "obligation_id": "credential_confidentiality_protection"}
|
||||||
{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V11.7.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V11.7 = Key Management.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Korrektur von V14: V11.7 = Key Management fuer Verschluesselung/Schluesselverwaltung.", "version": "2026-06-25"}
|
{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V11.7.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V11.7 = Key Management.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "Korrektur von V14: V11.7 = Key Management fuer Verschluesselung/Schluesselverwaltung.", "version": "2026-06-25", "obligation_id": "auth_key_management"}
|
||||||
{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.3.3", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25"}
|
{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.3.3", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25", "obligation_id": "event_logging_security_events"}
|
||||||
{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.3.4", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25"}
|
{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.3.4", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25", "obligation_id": "event_logging_security_events"}
|
||||||
{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.1.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25"}
|
{"source_norm": "CRA Annex I Part I (2)(k) — Sicherheitsrelevante Ereignisse / Logging", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V16.1.1", "mapping_type": "supports", "mapping_status": "accepted", "provenance": "human_curated", "rationale": "V16 = Security Logging.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V16 = Logging, sauberer Treffer fuer sicherheitsrelevante Ereignisse.", "version": "2026-06-25", "obligation_id": "event_logging_security_events"}
|
||||||
{"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V14.2.4", "mapping_type": "related", "mapping_status": "rejected", "provenance": "human_curated", "rationale": "Retriever-Kandidat.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V14 = Config, kein Auth — verworfen.", "version": "2026-06-25"}
|
{"source_norm": "CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V14.2.4", "mapping_type": "related", "mapping_status": "rejected", "provenance": "human_curated", "rationale": "Retriever-Kandidat.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V14 = Config, kein Auth — verworfen.", "version": "2026-06-25"}
|
||||||
{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V14.2.4", "mapping_type": "related", "mapping_status": "rejected", "provenance": "human_curated", "rationale": "Retriever-Kandidat.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V14 = Config, Crypto gehoert zu V11 — verworfen.", "version": "2026-06-25"}
|
{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V14.2.4", "mapping_type": "related", "mapping_status": "rejected", "provenance": "human_curated", "rationale": "Retriever-Kandidat.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V14 = Config, Crypto gehoert zu V11 — verworfen.", "version": "2026-06-25"}
|
||||||
{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V14.3.2", "mapping_type": "related", "mapping_status": "rejected", "provenance": "human_curated", "rationale": "Retriever-Kandidat.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V14 = Config, Crypto gehoert zu V11 — verworfen.", "version": "2026-06-25"}
|
{"source_norm": "CRA Annex I Part I (2)(d) — Vertraulichkeit / Verschluesselung", "source_role": "operational_requirement", "target_framework": "OWASP ASVS", "target_control": "V14.3.2", "mapping_type": "related", "mapping_status": "rejected", "provenance": "human_curated", "rationale": "Retriever-Kandidat.", "reviewed_by": "benjamin", "review_date": "2026-06-25", "review_reason": "V14 = Config, Crypto gehoert zu V11 — verworfen.", "version": "2026-06-25"}
|
||||||
|
|||||||
@@ -0,0 +1,10 @@
|
|||||||
|
// Evidence-Requirements je NIST-SP-800-53-Control (Schema: EvidenceRequirement). Eine Zeile = eine geforderte Evidenz.
|
||||||
|
// WICHTIG: evidence_type ist FRAMEWORK-AGNOSTISCH (geteilter Katalog config_export/test_report/repo_scan/sbom/...) —
|
||||||
|
// dieselben Typen tragen CRA, NIST, ISO 27001, IEC 62443, BSI. (framework, control) ist nur der Verweis, nicht der Typ.
|
||||||
|
// Stand 2026-06-25, Basis: die 3 accepted CRA->NIST primary_implementation-Mappings (SI-7 Integritaet, SI-2 Updates, CM-7 Angriffsflaeche).
|
||||||
|
{"framework": "NIST SP 800-53", "control": "SI-7", "evidence_type": "sbom", "evidence_source": "ci", "freshness_requirement": "per_release", "required": true, "rationale": "SBOM weist die Integritaet/Herkunft der Software-Bestandteile nach (bekannte, unmanipulierte Komponenten).", "version": "2026-06-25"}
|
||||||
|
{"framework": "NIST SP 800-53", "control": "SI-7", "evidence_type": "config_export", "evidence_source": "github", "freshness_requirement": "per_release", "required": true, "rationale": "Secure-Boot-/Code-Signing-Konfiguration als Nachweis der Integritaetspruefung.", "version": "2026-06-25"}
|
||||||
|
{"framework": "NIST SP 800-53", "control": "SI-2", "evidence_type": "config_export", "evidence_source": "github", "freshness_requirement": "per_release", "required": true, "rationale": "Konfiguration des sicheren Update-/Patch-Mechanismus (signierte/automatische Updates) als technischer Nachweis.", "version": "2026-06-25"}
|
||||||
|
{"framework": "NIST SP 800-53", "control": "SI-2", "evidence_type": "test_report", "evidence_source": "ci", "freshness_requirement": "per_release", "required": true, "rationale": "Update-/Patch-Verifikationstest (CI) belegt, dass Sicherheitsupdates greifen.", "version": "2026-06-25"}
|
||||||
|
{"framework": "NIST SP 800-53", "control": "CM-7", "evidence_type": "config_export", "evidence_source": "github", "freshness_requirement": "per_release", "required": true, "rationale": "Konfiguration deaktivierter Ports/Dienste/Funktionen als Nachweis minimierter Angriffsflaeche.", "version": "2026-06-25"}
|
||||||
|
{"framework": "NIST SP 800-53", "control": "CM-7", "evidence_type": "repo_scan", "evidence_source": "scanner", "freshness_requirement": "per_release", "required": true, "rationale": "Angriffsflaechen-Scan (offene Ports/Dienste) als Nachweis tatsaechlich minimierter Angriffsflaeche.", "version": "2026-06-25"}
|
||||||
@@ -0,0 +1,846 @@
|
|||||||
|
{
|
||||||
|
"schema_version": "obligation_join_keys_v1",
|
||||||
|
"contract": "obligation_id ist der stabile Join-Key. Legal Knowledge Graph haengt citation_spans an obligation_id; Compliance Execution Graph mappt control_mapping.source_norm -> obligation_id. Interim-Bruecke = citation_units. obligation_id NIE neu vergeben (re-link).",
|
||||||
|
"count": 95,
|
||||||
|
"obligation_ids": [
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_creation",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part II (1)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_dependency_coverage",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Art. 3(36) i.V.m. Annex I Part II (1)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_format_standard",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part II (1)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_maintenance_update",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part II (1)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_completeness_verification",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_tooling_automation",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "IMPLEMENTATION"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_access_provision",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_authority_provision",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Art. 31 / Annex I Part II (1)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_confidentiality",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Art. 31(4)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_supply_chain_contracts",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "sbom_technical_documentation",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "sbom",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Art. 31 i.V.m. Annex VII"
|
||||||
|
],
|
||||||
|
"source_role": "EVIDENCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "vuln_identification_inventory",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "vuln",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part II (1)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "vuln_assessment_prioritization",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "vuln",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part II (1)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "vuln_remediation_patching",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "vuln",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part II (2) & (8)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "vuln_handling_process",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "vuln",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Article 13(8) & Annex VII"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "coordinated_vulnerability_disclosure",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "vuln",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part II (5)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "exploited_vuln_reporting_authorities",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "vuln",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Article 14 & Article 16"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "vuln_info_dissemination_users",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "vuln",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part II (4) & (6)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "attack_surface_minimization",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "core",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part I (2)(j)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "software_integrity_protection",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "core",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part I (2)(f)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "user_authentication_required",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (2)(d)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "authentication_policy_documented",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "auth_exceptions_documented",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "mfa_required",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "step_up_authentication",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "privileged_op_reauth",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "strong_crypto_authentication",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (2)(e)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "credential_lifecycle_management",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "credential_confidentiality_protection",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (2)(e)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "password_policy",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "no_default_credentials",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (2)(a)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "account_lockout_failed_attempts",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "server_side_validation",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "session_binding_management",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "reauth_after_inactivity",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "token_validation_lifecycle",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "mutual_authentication",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "revocation_check",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "encrypted_auth_channel",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (2)(e)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "tls_certificate_auth",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "service_to_service_auth",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "auth_key_management",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "biometric_authentication",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "federated_auth_assertions",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "separate_authn_authz",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_authentication",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "supplier_access_auth",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "personal_admin_accounts",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "firmware_software_authentication",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "authentication",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (2)(c)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "event_logging_security_events",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part I (2)(k)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "access_control_event_logging",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part I (2)(k)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "audit_trail_admin_actions",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part I (2)(k)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "log_integrity_immutability",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part I (2)(k)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "log_access_control_protection",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part I (2)(k)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "log_retention_archival",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "centralized_log_management",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "log_monitoring_alerting",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I Part I (2)(k)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "log_data_minimization_privacy",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "log_format_standardization",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "log_timestamp_synchronization",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "logging_availability_resilience",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "logging_thread_safety_correctness",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "IMPLEMENTATION"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "logging_library_supply_chain",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "logging_config_management",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "logging_governance_roles",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "incident_response_logging",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "log_transmission_security",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "network_traffic_logging",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "logging",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_control_least_privilege",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (1)(2)(d)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_confidentiality_integrity",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (1)(2)(b)(c)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_session_management",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_mfa",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_encryption",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "reject_insecure_remote_protocols",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_logging_audit",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (1)(2)(g)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_user_validation_ot",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_training",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_architecture_design",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_attack_surface_min",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (1)(2)(a)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_vuln_patch_mgmt",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (2)(1)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_threat_detection",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_maintenance_governance",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "temporary_remote_access_mgmt",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_data_export_protection",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "component_remote_interface_security",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "remote_access_fallback_concept",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "remote_access",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "provide_security_updates",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "updates",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (2)(c)",
|
||||||
|
"Art. 13"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "support_period_maintenance",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "updates",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Art. 13(8)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "signed_update_integrity",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "updates",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (1)(3)(f)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "trusted_update_source",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "updates",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (1)(3)(d)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "update_testing_validation",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "updates",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "update_rollback",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "updates",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "GUIDANCE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "automatic_updates_optout",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "updates",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (2)(c)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "update_risk_assessment",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "updates",
|
||||||
|
"tier": "LEGAL_MINIMUM",
|
||||||
|
"citation_units": [
|
||||||
|
"Annex I (1)(2)"
|
||||||
|
],
|
||||||
|
"source_role": "LEGAL_BASIS"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obligation_id": "secure_modification_control",
|
||||||
|
"regulation": "CRA",
|
||||||
|
"family": "updates",
|
||||||
|
"tier": "BEST_PRACTICE",
|
||||||
|
"citation_units": [],
|
||||||
|
"source_role": "IMPLEMENTATION"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,126 @@
|
|||||||
|
package handlers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/ucca"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ComplianceGraphHandlers serves the read-only Compliance Execution Graph
|
||||||
|
// (Regulation -> Obligation -> Control -> Evidence) over the file-backed bridge artifacts.
|
||||||
|
// It is intentionally SEPARATE from the DB-backed ObligationsHandlers: this is the curated
|
||||||
|
// cross-session graph (Registry join keys + accepted control mappings + evidence requirements),
|
||||||
|
// loaded once at startup. Fail-closed: if the graph could not load, every request answers 503.
|
||||||
|
type ComplianceGraphHandlers struct {
|
||||||
|
joins *ucca.ObligationJoinKeys
|
||||||
|
mappings *ucca.ControlMappingSet
|
||||||
|
evidence *ucca.EvidenceRequirementSet
|
||||||
|
loadErr error
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewComplianceGraphHandlers loads the graph once. Construction never fails; a load error is
|
||||||
|
// retained and surfaced as 503 per request (matches the codebase's load-warn-continue startup).
|
||||||
|
func NewComplianceGraphHandlers() *ComplianceGraphHandlers {
|
||||||
|
joins, mappings, evidence, err := ucca.LoadComplianceGraph()
|
||||||
|
return &ComplianceGraphHandlers{joins: joins, mappings: mappings, evidence: evidence, loadErr: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadError exposes a startup load failure so the wiring can log a warning.
|
||||||
|
func (h *ComplianceGraphHandlers) LoadError() error { return h.loadErr }
|
||||||
|
|
||||||
|
// RegisterRoutes mounts the compliance-graph routes under /compliance.
|
||||||
|
func (h *ComplianceGraphHandlers) RegisterRoutes(r *gin.RouterGroup) {
|
||||||
|
g := r.Group("/compliance")
|
||||||
|
g.GET("/obligation-status", h.ObligationStatus)
|
||||||
|
}
|
||||||
|
|
||||||
|
type cgControlDTO struct {
|
||||||
|
Framework string `json:"framework"`
|
||||||
|
Control string `json:"control"`
|
||||||
|
MappingType string `json:"mapping_type"`
|
||||||
|
EvidenceRequired []string `json:"evidence_required"`
|
||||||
|
EvidenceStatus string `json:"evidence_status"` // missing | partial | present | none_required
|
||||||
|
}
|
||||||
|
|
||||||
|
type cgStatusResponse struct {
|
||||||
|
ObligationID string `json:"obligation_id"`
|
||||||
|
OverallStatus string `json:"overall_status"` // unknown_obligation | unmapped | not_assessed | open | met
|
||||||
|
LegalBasis []string `json:"legal_basis,omitempty"`
|
||||||
|
CitationSpans string `json:"citation_spans"` // "pending" until the Legal-KG attaches spans
|
||||||
|
Controls []cgControlDTO `json:"controls"`
|
||||||
|
Note string `json:"note,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ObligationStatus answers GET /sdk/v1/compliance/obligation-status?obligation_id=...
|
||||||
|
//
|
||||||
|
// It NEVER asserts fulfillment automatically. With no evidence collection wired (MVP), a mapped
|
||||||
|
// obligation is "not_assessed" and every required evidence is "missing" — the honest picture is
|
||||||
|
// "required vs present evidence", not "a document exists". Fail-closed otherwise:
|
||||||
|
// - no obligation_id -> 400
|
||||||
|
// - graph not loaded -> 503
|
||||||
|
// - id not in the Registry -> 200 overall_status=unknown_obligation
|
||||||
|
// - mapped but no control yet -> 200 overall_status=unmapped
|
||||||
|
func (h *ComplianceGraphHandlers) ObligationStatus(c *gin.Context) {
|
||||||
|
if h.loadErr != nil {
|
||||||
|
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "compliance graph unavailable", "detail": h.loadErr.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
obID := strings.TrimSpace(c.Query("obligation_id"))
|
||||||
|
if obID == "" {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": "obligation_id query parameter required"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
resp := cgStatusResponse{ObligationID: obID, CitationSpans: "pending", Controls: []cgControlDTO{}}
|
||||||
|
|
||||||
|
if h.joins.FindObligation(obID) == nil {
|
||||||
|
resp.OverallStatus = "unknown_obligation"
|
||||||
|
resp.Note = "obligation_id not in the Registry join-key contract"
|
||||||
|
c.JSON(http.StatusOK, resp)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// MVP: hasEvidence=nil -> no collection wired -> all required evidence counts as missing.
|
||||||
|
st := ucca.AssessObligationStatus(h.joins, h.mappings, h.evidence, obID, nil)
|
||||||
|
resp.LegalBasis = st.LegalBasis
|
||||||
|
|
||||||
|
if len(st.Controls) == 0 {
|
||||||
|
resp.OverallStatus = "unmapped"
|
||||||
|
resp.Note = "no accepted control maps to this obligation yet"
|
||||||
|
c.JSON(http.StatusOK, resp)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cs := range st.Controls {
|
||||||
|
types := make([]string, 0, len(cs.RequiredEvidence))
|
||||||
|
for _, e := range cs.RequiredEvidence {
|
||||||
|
types = append(types, e.EvidenceType)
|
||||||
|
}
|
||||||
|
resp.Controls = append(resp.Controls, cgControlDTO{
|
||||||
|
Framework: cs.Framework,
|
||||||
|
Control: cs.Control,
|
||||||
|
MappingType: cs.MappingType,
|
||||||
|
EvidenceRequired: types,
|
||||||
|
EvidenceStatus: cgEvidenceStatus(len(cs.RequiredEvidence), len(cs.MissingEvidence)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
// No fulfillment claim without real evidence collection.
|
||||||
|
resp.OverallStatus = "not_assessed"
|
||||||
|
resp.Note = "evidence collection not wired (MVP) — fulfillment not asserted"
|
||||||
|
c.JSON(http.StatusOK, resp)
|
||||||
|
}
|
||||||
|
|
||||||
|
func cgEvidenceStatus(required, missing int) string {
|
||||||
|
switch {
|
||||||
|
case required == 0:
|
||||||
|
return "none_required"
|
||||||
|
case missing == 0:
|
||||||
|
return "present"
|
||||||
|
case missing == required:
|
||||||
|
return "missing"
|
||||||
|
default:
|
||||||
|
return "partial"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,133 @@
|
|||||||
|
package handlers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newComplianceGraphTestRouter(t *testing.T) *gin.Engine {
|
||||||
|
t.Helper()
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
h := NewComplianceGraphHandlers()
|
||||||
|
if err := h.LoadError(); err != nil {
|
||||||
|
t.Fatalf("compliance graph failed to load (candidate paths): %v", err)
|
||||||
|
}
|
||||||
|
r := gin.New()
|
||||||
|
h.RegisterRoutes(r.Group("/sdk/v1"))
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func getObligationStatus(t *testing.T, r *gin.Engine, query string) (int, cgStatusResponse) {
|
||||||
|
t.Helper()
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
req, _ := http.NewRequest(http.MethodGet, "/sdk/v1/compliance/obligation-status"+query, nil)
|
||||||
|
r.ServeHTTP(w, req)
|
||||||
|
var resp cgStatusResponse
|
||||||
|
if w.Code == http.StatusOK {
|
||||||
|
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||||
|
t.Fatalf("decode body %q: %v", w.Body.String(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return w.Code, resp
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestObligationStatus(t *testing.T) {
|
||||||
|
r := newComplianceGraphTestRouter(t)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
query string
|
||||||
|
wantHTTP int
|
||||||
|
wantOverall string
|
||||||
|
wantControls bool // expect >=1 control
|
||||||
|
}{
|
||||||
|
{"missing param -> 400", "", http.StatusBadRequest, "", false},
|
||||||
|
{"unknown id -> unknown_obligation", "?obligation_id=does_not_exist", http.StatusOK, "unknown_obligation", false},
|
||||||
|
{"mapped (OWASP V6) -> not_assessed", "?obligation_id=user_authentication_required", http.StatusOK, "not_assessed", true},
|
||||||
|
{"NIST adopted (SI-2) -> not_assessed", "?obligation_id=provide_security_updates", http.StatusOK, "not_assessed", true},
|
||||||
|
{"CORE attack_surface_minimization -> CM-7", "?obligation_id=attack_surface_minimization", http.StatusOK, "not_assessed", true},
|
||||||
|
{"CORE software_integrity_protection -> SI-7", "?obligation_id=software_integrity_protection", http.StatusOK, "not_assessed", true},
|
||||||
|
{"in registry, no control -> unmapped", "?obligation_id=sbom_creation", http.StatusOK, "unmapped", false},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
code, resp := getObligationStatus(t, r, tt.query)
|
||||||
|
if code != tt.wantHTTP {
|
||||||
|
t.Fatalf("http %d, want %d", code, tt.wantHTTP)
|
||||||
|
}
|
||||||
|
if tt.wantHTTP != http.StatusOK {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if resp.OverallStatus != tt.wantOverall {
|
||||||
|
t.Errorf("overall_status=%q, want %q", resp.OverallStatus, tt.wantOverall)
|
||||||
|
}
|
||||||
|
if tt.wantControls && len(resp.Controls) == 0 {
|
||||||
|
t.Error("expected >=1 control")
|
||||||
|
}
|
||||||
|
if !tt.wantControls && len(resp.Controls) != 0 {
|
||||||
|
t.Errorf("expected 0 controls, got %d", len(resp.Controls))
|
||||||
|
}
|
||||||
|
if resp.CitationSpans != "pending" {
|
||||||
|
t.Errorf("citation_spans=%q, want pending", resp.CitationSpans)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The MVP must NEVER auto-assert fulfillment: with no evidence collection wired, every required
|
||||||
|
// evidence is "missing" and the overall status stays "not_assessed".
|
||||||
|
func TestObligationStatus_NoFulfillmentClaim(t *testing.T) {
|
||||||
|
r := newComplianceGraphTestRouter(t)
|
||||||
|
code, resp := getObligationStatus(t, r, "?obligation_id=user_authentication_required")
|
||||||
|
if code != http.StatusOK {
|
||||||
|
t.Fatalf("http %d", code)
|
||||||
|
}
|
||||||
|
if resp.OverallStatus == "met" || resp.OverallStatus == "erfuellt" {
|
||||||
|
t.Fatalf("MVP must not assert fulfillment, got overall_status=%q", resp.OverallStatus)
|
||||||
|
}
|
||||||
|
for _, ctl := range resp.Controls {
|
||||||
|
if len(ctl.EvidenceRequired) > 0 && ctl.EvidenceStatus != "missing" {
|
||||||
|
t.Errorf("control %s/%s evidence_status=%q, want missing (no collection wired)", ctl.Framework, ctl.Control, ctl.EvidenceStatus)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pin the curated evidence_required set per NIST obligation. A required:false row silently
|
||||||
|
// drops from evidence_required, which the table test above (control-count only) would miss.
|
||||||
|
func TestObligationStatus_NISTEvidenceTypes(t *testing.T) {
|
||||||
|
r := newComplianceGraphTestRouter(t)
|
||||||
|
want := map[string][]string{
|
||||||
|
"attack_surface_minimization": {"config_export", "repo_scan"},
|
||||||
|
"software_integrity_protection": {"sbom", "config_export"},
|
||||||
|
"provide_security_updates": {"config_export", "test_report"},
|
||||||
|
}
|
||||||
|
for ob, exp := range want {
|
||||||
|
_, resp := getObligationStatus(t, r, "?obligation_id="+ob)
|
||||||
|
if len(resp.Controls) != 1 {
|
||||||
|
t.Fatalf("%s: want 1 control, got %d", ob, len(resp.Controls))
|
||||||
|
}
|
||||||
|
if got := resp.Controls[0].EvidenceRequired; !sameStringSet(got, exp) {
|
||||||
|
t.Errorf("%s evidence_required = %v, want %v", ob, got, exp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func sameStringSet(a, b []string) bool {
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
m := make(map[string]bool, len(a))
|
||||||
|
for _, x := range a {
|
||||||
|
m[x] = true
|
||||||
|
}
|
||||||
|
for _, x := range b {
|
||||||
|
if !m[x] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
@@ -153,6 +153,12 @@ func buildRouter(cfg *config.Config, pool *pgxpool.Pool) *gin.Engine {
|
|||||||
ragHandlers := handlers.NewRAGHandlers(corpusVersionStore)
|
ragHandlers := handlers.NewRAGHandlers(corpusVersionStore)
|
||||||
obligationsHandlers := handlers.NewObligationsHandlersWithStore(obligationsStore)
|
obligationsHandlers := handlers.NewObligationsHandlersWithStore(obligationsStore)
|
||||||
|
|
||||||
|
// Compliance Execution Graph (file-backed: Registry join keys + accepted control mappings + evidence)
|
||||||
|
complianceGraphHandlers := handlers.NewComplianceGraphHandlers()
|
||||||
|
if err := complianceGraphHandlers.LoadError(); err != nil {
|
||||||
|
log.Printf("WARNING: compliance graph not loaded (obligation-status -> 503): %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Regulatory News
|
// Regulatory News
|
||||||
allV2Regs, err := ucca.LoadAllV2Regulations()
|
allV2Regs, err := ucca.LoadAllV2Regulations()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -201,7 +207,8 @@ func buildRouter(cfg *config.Config, pool *pgxpool.Pool) *gin.Engine {
|
|||||||
uccaHandlers, escalationHandlers, obligationsHandlers, ragHandlers,
|
uccaHandlers, escalationHandlers, obligationsHandlers, ragHandlers,
|
||||||
roadmapHandlers, workshopHandlers, portfolioHandlers,
|
roadmapHandlers, workshopHandlers, portfolioHandlers,
|
||||||
academyHandlers, trainingHandlers, whistleblowerHandlers, iaceHandler,
|
academyHandlers, trainingHandlers, whistleblowerHandlers, iaceHandler,
|
||||||
gapHandler, maximizerHandlers, regulatoryNewsHandlers, useCaseHandler)
|
gapHandler, maximizerHandlers, regulatoryNewsHandlers, useCaseHandler,
|
||||||
|
complianceGraphHandlers)
|
||||||
|
|
||||||
return router
|
return router
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ func registerRoutes(
|
|||||||
maximizerHandlers *handlers.MaximizerHandlers,
|
maximizerHandlers *handlers.MaximizerHandlers,
|
||||||
regulatoryNewsHandlers *handlers.RegulatoryNewsHandlers,
|
regulatoryNewsHandlers *handlers.RegulatoryNewsHandlers,
|
||||||
useCaseHandler *handlers.UseCaseHandler,
|
useCaseHandler *handlers.UseCaseHandler,
|
||||||
|
complianceGraphHandlers *handlers.ComplianceGraphHandlers,
|
||||||
) {
|
) {
|
||||||
v1 := router.Group("/sdk/v1")
|
v1 := router.Group("/sdk/v1")
|
||||||
{
|
{
|
||||||
@@ -54,6 +55,7 @@ func registerRoutes(
|
|||||||
registerMaximizerRoutes(v1, maximizerHandlers)
|
registerMaximizerRoutes(v1, maximizerHandlers)
|
||||||
registerUseCaseRoutes(v1, useCaseHandler)
|
registerUseCaseRoutes(v1, useCaseHandler)
|
||||||
v1.GET("/regulatory-news", regulatoryNewsHandlers.GetNews)
|
v1.GET("/regulatory-news", regulatoryNewsHandlers.GetNews)
|
||||||
|
complianceGraphHandlers.RegisterRoutes(v1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,10 @@ type DictionarySuggestion struct {
|
|||||||
Token string `json:"token"`
|
Token string `json:"token"`
|
||||||
Field string `json:"field"`
|
Field string `json:"field"`
|
||||||
PatternIDs []string `json:"pattern_ids"`
|
PatternIDs []string `json:"pattern_ids"`
|
||||||
|
// SuggestedTags are the RequiredComponentTags shared by the naming patterns,
|
||||||
|
// ranked by frequency — the candidate tags a keyword_dictionary entry for this
|
||||||
|
// token would emit so narratives mentioning it can trigger those patterns.
|
||||||
|
SuggestedTags []string `json:"suggested_tags,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type VocabularyReport struct {
|
type VocabularyReport struct {
|
||||||
|
|||||||
@@ -66,14 +66,19 @@ func runVocabulary(form map[string]any) VocabularyReport {
|
|||||||
|
|
||||||
// For each unknown token check if any pattern names it
|
// For each unknown token check if any pattern names it
|
||||||
patterns := iace.AllPatterns()
|
patterns := iace.AllPatterns()
|
||||||
|
byID := make(map[string]iace.HazardPattern, len(patterns))
|
||||||
|
for _, p := range patterns {
|
||||||
|
byID[p.ID] = p
|
||||||
|
}
|
||||||
for _, tok := range report.UnknownTokens {
|
for _, tok := range report.UnknownTokens {
|
||||||
hits := patternsMentioning(tok, patterns)
|
hits := patternsMentioning(tok, patterns)
|
||||||
if len(hits) == 0 {
|
if len(hits) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
|
report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
|
||||||
Token: tok,
|
Token: tok,
|
||||||
PatternIDs: hits,
|
PatternIDs: hits,
|
||||||
|
SuggestedTags: suggestTagsFor(hits, byID),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
|
sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
|
||||||
@@ -129,18 +134,24 @@ func dictTokenHit(tok string, dict map[string]bool) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/
|
// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/harm/
|
||||||
// harm/zone text contains the token (case-insensitive substring).
|
// zone text names the token as a WHOLE WORD. Whole-word (not substring) matching
|
||||||
|
// is essential: a substring match flags common fragments like "stehen" inside
|
||||||
|
// "entstehen", producing spurious hits and nonsensical tag suggestions.
|
||||||
func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
|
func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
|
||||||
tokLower := strings.ToLower(tok)
|
tokLower := strings.ToLower(tok)
|
||||||
seen := map[string]bool{}
|
seen := map[string]bool{}
|
||||||
var out []string
|
var out []string
|
||||||
for _, p := range patterns {
|
for _, p := range patterns {
|
||||||
hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
|
hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
|
||||||
if !strings.Contains(hay, tokLower) {
|
matched := false
|
||||||
continue
|
for _, w := range tokenRE.FindAllString(hay, -1) {
|
||||||
|
if w == tokLower {
|
||||||
|
matched = true
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if seen[p.ID] {
|
if !matched || seen[p.ID] {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
seen[p.ID] = true
|
seen[p.ID] = true
|
||||||
@@ -151,3 +162,57 @@ func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
|
|||||||
}
|
}
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// suggestTagsFor returns the RequiredComponentTags shared across the naming
|
||||||
|
// patterns, ranked by how many of them require each tag (ties broken by name),
|
||||||
|
// top 3. These are the candidate tags a dictionary entry for the token should
|
||||||
|
// emit so a narrative mentioning the token can trigger those patterns.
|
||||||
|
func suggestTagsFor(ids []string, byID map[string]iace.HazardPattern) []string {
|
||||||
|
freq := map[string]int{}
|
||||||
|
total := 0
|
||||||
|
for _, id := range ids {
|
||||||
|
p, ok := byID[id]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
total++
|
||||||
|
seen := map[string]bool{}
|
||||||
|
for _, tag := range p.RequiredComponentTags {
|
||||||
|
if seen[tag] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[tag] = true
|
||||||
|
freq[tag]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if total == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
type tf struct {
|
||||||
|
tag string
|
||||||
|
n int
|
||||||
|
}
|
||||||
|
ranked := make([]tf, 0, len(freq))
|
||||||
|
for t, n := range freq {
|
||||||
|
ranked = append(ranked, tf{t, n})
|
||||||
|
}
|
||||||
|
sort.Slice(ranked, func(i, j int) bool {
|
||||||
|
if ranked[i].n != ranked[j].n {
|
||||||
|
return ranked[i].n > ranked[j].n
|
||||||
|
}
|
||||||
|
return ranked[i].tag < ranked[j].tag
|
||||||
|
})
|
||||||
|
// Only suggest a tag shared by >= 40% of the naming patterns. Diffuse tokens
|
||||||
|
// (common verbs spread across categories) get no dominant tag and are dropped.
|
||||||
|
var out []string
|
||||||
|
for _, x := range ranked {
|
||||||
|
if float64(x.n)/float64(total) < 0.4 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
out = append(out, x.tag)
|
||||||
|
if len(out) >= 3 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
package audit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSuggestTagsFor_RanksSharedRequiredTags(t *testing.T) {
|
||||||
|
byID := map[string]iace.HazardPattern{
|
||||||
|
"P1": {ID: "P1", RequiredComponentTags: []string{"backflow_risk", "dom_warewashing"}},
|
||||||
|
"P2": {ID: "P2", RequiredComponentTags: []string{"backflow_risk"}},
|
||||||
|
"P3": {ID: "P3", RequiredComponentTags: []string{"sharp_edge"}},
|
||||||
|
}
|
||||||
|
got := suggestTagsFor([]string{"P1", "P2", "P3"}, byID)
|
||||||
|
if len(got) == 0 || got[0] != "backflow_risk" {
|
||||||
|
t.Fatalf("want backflow_risk ranked first (2 patterns), got %v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSuggestTagsFor_TopThreeStableAlpha(t *testing.T) {
|
||||||
|
byID := map[string]iace.HazardPattern{
|
||||||
|
"P1": {ID: "P1", RequiredComponentTags: []string{"d", "b", "a", "c"}},
|
||||||
|
}
|
||||||
|
got := suggestTagsFor([]string{"P1"}, byID)
|
||||||
|
if len(got) != 3 || got[0] != "a" || got[1] != "b" || got[2] != "c" {
|
||||||
|
t.Fatalf("want stable alpha top-3 [a b c], got %v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSuggestTagsFor_UnknownPatternIgnored(t *testing.T) {
|
||||||
|
byID := map[string]iace.HazardPattern{}
|
||||||
|
if got := suggestTagsFor([]string{"missing"}, byID); len(got) != 0 {
|
||||||
|
t.Fatalf("want empty for unknown patterns, got %v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,8 +7,6 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
|
// TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
|
||||||
@@ -110,65 +108,6 @@ func TestKistenhub_GTCoverage(t *testing.T) {
|
|||||||
// patternsToHazardsAndMitigations converts a pattern match output into the
|
// patternsToHazardsAndMitigations converts a pattern match output into the
|
||||||
// Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
|
// Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
|
||||||
// iace_handler_init.go does in production but without DB writes.
|
// iace_handler_init.go does in production but without DB writes.
|
||||||
func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
|
|
||||||
hazards := make([]Hazard, 0, len(out.MatchedPatterns))
|
|
||||||
patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
|
|
||||||
|
|
||||||
for _, pm := range out.MatchedPatterns {
|
|
||||||
cat := ""
|
|
||||||
if len(pm.HazardCats) > 0 {
|
|
||||||
cat = pm.HazardCats[0]
|
|
||||||
}
|
|
||||||
zone := pm.ZoneDE
|
|
||||||
lifecycle := ""
|
|
||||||
if len(pm.ApplicableLifecycles) > 0 {
|
|
||||||
lifecycle = pm.ApplicableLifecycles[0]
|
|
||||||
}
|
|
||||||
h := Hazard{
|
|
||||||
ID: uuid.New(),
|
|
||||||
Name: pm.ScenarioDE,
|
|
||||||
Category: cat,
|
|
||||||
Description: pm.ScenarioDE,
|
|
||||||
Scenario: pm.ScenarioDE,
|
|
||||||
TriggerEvent: pm.TriggerDE,
|
|
||||||
PossibleHarm: pm.HarmDE,
|
|
||||||
AffectedPerson: pm.AffectedDE,
|
|
||||||
HazardousZone: zone,
|
|
||||||
LifecyclePhase: lifecycle,
|
|
||||||
}
|
|
||||||
if h.Name == "" {
|
|
||||||
h.Name = pm.PatternName
|
|
||||||
}
|
|
||||||
hazards = append(hazards, h)
|
|
||||||
patternToHazard[pm.PatternID] = h.ID
|
|
||||||
}
|
|
||||||
|
|
||||||
measureNames := make(map[string]string)
|
|
||||||
for _, m := range GetProtectiveMeasureLibrary() {
|
|
||||||
measureNames[m.ID] = m.Name
|
|
||||||
}
|
|
||||||
|
|
||||||
var mitigations []Mitigation
|
|
||||||
for _, sm := range out.SuggestedMeasures {
|
|
||||||
name := measureNames[sm.MeasureID]
|
|
||||||
if name == "" {
|
|
||||||
name = sm.MeasureID
|
|
||||||
}
|
|
||||||
for _, srcPattern := range sm.SourcePatterns {
|
|
||||||
hid, ok := patternToHazard[srcPattern]
|
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
mitigations = append(mitigations, Mitigation{
|
|
||||||
ID: uuid.New(),
|
|
||||||
HazardID: hid,
|
|
||||||
Name: name,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return hazards, mitigations
|
|
||||||
}
|
|
||||||
|
|
||||||
func abbrev(s string, max int) string {
|
func abbrev(s string, max int) string {
|
||||||
if len(s) <= max {
|
if len(s) <= max {
|
||||||
return s
|
return s
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package iace
|
package iace
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -45,7 +46,7 @@ var warewashingCyberCategories = map[string]bool{
|
|||||||
|
|
||||||
// warewashingEngineOutput runs the production chain and returns the filtered
|
// warewashingEngineOutput runs the production chain and returns the filtered
|
||||||
// hazards/mitigations the user would see for the UC-M.
|
// hazards/mitigations the user would see for the UC-M.
|
||||||
func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
|
func warewashingEngineOutput() ([]Hazard, []Mitigation, []PatternMatch) {
|
||||||
res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)")
|
res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)")
|
||||||
|
|
||||||
var compIDs, compNames []string
|
var compIDs, compNames []string
|
||||||
@@ -94,7 +95,7 @@ func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
|
|||||||
filtered := *out
|
filtered := *out
|
||||||
filtered.MatchedPatterns = kept
|
filtered.MatchedPatterns = kept
|
||||||
hazards, mitigations := patternsToHazardsAndMitigations(&filtered)
|
hazards, mitigations := patternsToHazardsAndMitigations(&filtered)
|
||||||
return hazards, mitigations, len(kept)
|
return hazards, mitigations, kept
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWarewashing_GTCoverage(t *testing.T) {
|
func TestWarewashing_GTCoverage(t *testing.T) {
|
||||||
@@ -119,8 +120,8 @@ func TestWarewashing_GTCoverage(t *testing.T) {
|
|||||||
t.Logf("Parsed components: %v", cn)
|
t.Logf("Parsed components: %v", cn)
|
||||||
}
|
}
|
||||||
|
|
||||||
hazards, mitigations, nPatterns := warewashingEngineOutput()
|
hazards, mitigations, keptPatterns := warewashingEngineOutput()
|
||||||
t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", nPatterns, len(hazards))
|
t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", len(keptPatterns), len(hazards))
|
||||||
|
|
||||||
result := CompareBenchmark(>, hazards, mitigations)
|
result := CompareBenchmark(>, hazards, mitigations)
|
||||||
precision := 0.0
|
precision := 0.0
|
||||||
@@ -180,3 +181,57 @@ func TestWarewashing_GTCoverage(t *testing.T) {
|
|||||||
t.Errorf("warewashing recall below 40%% floor: %.1f%%", result.CoverageScore*100)
|
t.Errorf("warewashing recall below 40%% floor: %.1f%%", result.CoverageScore*100)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestWarewashing_DedupProposer exercises the offline dedup-candidate proposer
|
||||||
|
// end-to-end on the real warewashing engine output: detect candidates, screen
|
||||||
|
// each against the GT, and log the human-review queue. It asserts the WALL is
|
||||||
|
// self-consistent — a PASS verdict may never coincide with a recall drop.
|
||||||
|
func TestWarewashing_DedupProposer(t *testing.T) {
|
||||||
|
raw, err := os.ReadFile(filepath.Join("testdata", "ground_truth_warewashing.json"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read GT: %v", err)
|
||||||
|
}
|
||||||
|
var gt GroundTruth
|
||||||
|
if err := json.Unmarshal(raw, >); err != nil {
|
||||||
|
t.Fatalf("parse GT: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
hazards, mits, kept := warewashingEngineOutput()
|
||||||
|
byID := map[string]PatternMatch{}
|
||||||
|
for _, pm := range kept {
|
||||||
|
byID[pm.PatternID] = pm
|
||||||
|
}
|
||||||
|
// 0.25 is a deliberately permissive candidate threshold: the proposer is meant
|
||||||
|
// to over-surface, because the deterministic GT wall below (and a human, and the
|
||||||
|
// LLM judge) is the precision filter — not the detector.
|
||||||
|
candidates := FindDedupCandidates(kept, 0.25)
|
||||||
|
t.Logf("Proposer: %d dedup candidate(s) from %d fired patterns", len(candidates), len(kept))
|
||||||
|
|
||||||
|
// Deterministic judge in the test; the dev-time CLI swaps in LLMJudge.
|
||||||
|
judge := HeuristicJudge{}
|
||||||
|
var judged []JudgedProposal
|
||||||
|
blocked := 0
|
||||||
|
for _, c := range candidates {
|
||||||
|
sr := ScreenSupersession(>, hazards, mits, c.KeepHazardName, c.DropName)
|
||||||
|
switch {
|
||||||
|
case sr.RecallAfter < sr.RecallBefore:
|
||||||
|
t.Logf("[BLOCK recall-load-bearing] keep %s / drop %s", c.KeepPattern, c.DropPattern)
|
||||||
|
blocked++
|
||||||
|
case sr.DistinctGT:
|
||||||
|
t.Logf("[BLOCK distinct GT %s vs %s] keep %s / drop %s", sr.KeepGT, sr.DropGT, c.KeepPattern, c.DropPattern)
|
||||||
|
blocked++
|
||||||
|
default:
|
||||||
|
if !sr.Safe {
|
||||||
|
t.Errorf("RECALL-SAFE branch but ScreenResult.Safe=false for drop %s", c.DropPattern)
|
||||||
|
}
|
||||||
|
v, conf, rat := judge.Judge(context.Background(), c, byID[c.KeepPattern], byID[c.DropPattern])
|
||||||
|
judged = append(judged, JudgedProposal{
|
||||||
|
Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("\n%s", RenderProposalQueue("Gewerbliche Geschirrspuelmaschine (vernetzt)", judged))
|
||||||
|
t.Logf("Proposer summary: %d candidate(s) in queue (judge=%s), %d BLOCKED by the GT wall — propose-only, nothing auto-applied",
|
||||||
|
len(judged), judge.Name(), blocked)
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,50 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "sort"
|
||||||
|
|
||||||
|
// EN ISO 12100 hazard-group ordering for the hazard log. Without it the log is
|
||||||
|
// returned in pattern-firing order, which reads as a jumble. This groups the
|
||||||
|
// hazards top-down by type (A. Mechanisch, B. Elektrisch, C. Thermisch, …),
|
||||||
|
// matching the frontend CATEGORY_LABELS.
|
||||||
|
var isoCategoryRank = map[string]int{
|
||||||
|
// A. Mechanisch
|
||||||
|
"mechanical_hazard": 10, "mechanical": 10, "maintenance_hazard": 11,
|
||||||
|
// B. Elektrisch
|
||||||
|
"electrical_hazard": 20, "electrical": 20, "emc_hazard": 21,
|
||||||
|
// C. Thermisch
|
||||||
|
"thermal_hazard": 30, "thermal": 30, "high_temperature": 31, "fire_explosion": 32,
|
||||||
|
// D. Pneumatik / Hydraulik
|
||||||
|
"pneumatic_hydraulic": 40,
|
||||||
|
// E. Laerm / Vibration
|
||||||
|
"noise_hazard": 50, "noise_vibration": 50, "vibration_hazard": 51,
|
||||||
|
// F. Ergonomie
|
||||||
|
"ergonomic_hazard": 60, "ergonomic": 60,
|
||||||
|
// G. Stoffe / Umwelt
|
||||||
|
"material_environmental": 70, "chemical_risk": 71, "radiation_hazard": 72,
|
||||||
|
// H. Software / Steuerung (funktionale Sicherheit)
|
||||||
|
"software_control": 80, "software_fault": 80, "safety_function_failure": 81,
|
||||||
|
"configuration_error": 82, "sensor_fault": 83, "hmi_error": 84, "mode_confusion": 85,
|
||||||
|
"communication_failure": 86, "update_failure": 87,
|
||||||
|
// I. Cyber / Netzwerk (zur Ordnungs-Vollstaendigkeit; im CE-Log ausgeschlossen)
|
||||||
|
"unauthorized_access": 90, "firmware_corruption": 91, "cyber_resilience": 92,
|
||||||
|
"cyber_network": 93, "logging_audit_failure": 94, "sensor_spoofing": 95,
|
||||||
|
// J. KI-spezifisch
|
||||||
|
"ai_specific": 100, "ai_misclassification": 100, "false_classification": 100,
|
||||||
|
"model_drift": 100, "data_poisoning": 100, "unintended_bias": 100,
|
||||||
|
}
|
||||||
|
|
||||||
|
func categoryRank(cat string) int {
|
||||||
|
if r, ok := isoCategoryRank[cat]; ok {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return 999 // unknown categories last
|
||||||
|
}
|
||||||
|
|
||||||
|
// SortHazardsByISO12100 groups hazards by ISO 12100 hazard group. Stable: the
|
||||||
|
// relative order within a group (creation/priority order from the engine) is
|
||||||
|
// preserved.
|
||||||
|
func SortHazardsByISO12100(hazards []Hazard) {
|
||||||
|
sort.SliceStable(hazards, func(i, j int) bool {
|
||||||
|
return categoryRank(hazards[i].Category) < categoryRank(hazards[j].Category)
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -157,7 +157,7 @@ func GetGTBremseHazardPatterns() []HazardPattern {
|
|||||||
// ════════════════════════════════════════════════════════════════
|
// ════════════════════════════════════════════════════════════════
|
||||||
{
|
{
|
||||||
ID: "HP1717", NameDE: "Verletzung durch unvermittelt austretende pneumatische Restenergie", NameEN: "Injury from unexpectedly released pneumatic stored energy",
|
ID: "HP1717", NameDE: "Verletzung durch unvermittelt austretende pneumatische Restenergie", NameEN: "Injury from unexpectedly released pneumatic stored energy",
|
||||||
RequiredComponentTags: []string{"stored_energy"},
|
RequiredComponentTags: []string{"pneumatic_part"},
|
||||||
RequiredEnergyTags: []string{"pneumatic_pressure"},
|
RequiredEnergyTags: []string{"pneumatic_pressure"},
|
||||||
GeneratedHazardCats: []string{"mechanical_hazard"},
|
GeneratedHazardCats: []string{"mechanical_hazard"},
|
||||||
SuggestedMeasureIDs: []string{"M485", "M534", "M527"},
|
SuggestedMeasureIDs: []string{"M485", "M534", "M527"},
|
||||||
|
|||||||
@@ -375,7 +375,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
|
|||||||
// ================================================================
|
// ================================================================
|
||||||
{
|
{
|
||||||
ID: "HP753", NameDE: "Thermal Runaway bei Lithium-Batterie", NameEN: "Thermal runaway of lithium battery",
|
ID: "HP753", NameDE: "Thermal Runaway bei Lithium-Batterie", NameEN: "Thermal runaway of lithium battery",
|
||||||
RequiredComponentTags: []string{"stored_energy", "high_temperature"},
|
RequiredComponentTags: []string{"battery", "high_temperature"},
|
||||||
RequiredEnergyTags: []string{"electrical_energy", "thermal"},
|
RequiredEnergyTags: []string{"electrical_energy", "thermal"},
|
||||||
GeneratedHazardCats: []string{"thermal_hazard", "electrical_hazard"},
|
GeneratedHazardCats: []string{"thermal_hazard", "electrical_hazard"},
|
||||||
SuggestedMeasureIDs: []string{"M005", "M141"},
|
SuggestedMeasureIDs: []string{"M005", "M141"},
|
||||||
@@ -390,7 +390,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP754", NameDE: "Ausgasung giftiger Daempfe aus Batterie", NameEN: "Toxic gas emission from battery",
|
ID: "HP754", NameDE: "Ausgasung giftiger Daempfe aus Batterie", NameEN: "Toxic gas emission from battery",
|
||||||
RequiredComponentTags: []string{"stored_energy", "chemical_risk"},
|
RequiredComponentTags: []string{"battery", "chemical_risk"},
|
||||||
RequiredEnergyTags: []string{},
|
RequiredEnergyTags: []string{},
|
||||||
GeneratedHazardCats: []string{"material_environmental"},
|
GeneratedHazardCats: []string{"material_environmental"},
|
||||||
SuggestedMeasureIDs: []string{"M005", "M141"},
|
SuggestedMeasureIDs: []string{"M005", "M141"},
|
||||||
@@ -405,7 +405,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "HP755", NameDE: "Elektrischer Schlag an Hochvolt-Batteriespeicher", NameEN: "Electric shock from high-voltage battery storage",
|
ID: "HP755", NameDE: "Elektrischer Schlag an Hochvolt-Batteriespeicher", NameEN: "Electric shock from high-voltage battery storage",
|
||||||
RequiredComponentTags: []string{"stored_energy", "electrical_part"},
|
RequiredComponentTags: []string{"battery", "electrical_part"},
|
||||||
RequiredEnergyTags: []string{"electrical_energy"},
|
RequiredEnergyTags: []string{"electrical_energy"},
|
||||||
GeneratedHazardCats: []string{"electrical_hazard"},
|
GeneratedHazardCats: []string{"electrical_hazard"},
|
||||||
SuggestedMeasureIDs: []string{"M082", "M141"},
|
SuggestedMeasureIDs: []string{"M082", "M141"},
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ func GetKeywordDictionary() []KeywordEntry {
|
|||||||
{Keywords: []string{"kreiselmaeher", "scheibenmaeher", "maehwerk"}, ExtraTags: []string{"agri_mower"}},
|
{Keywords: []string{"kreiselmaeher", "scheibenmaeher", "maehwerk"}, ExtraTags: []string{"agri_mower"}},
|
||||||
{Keywords: []string{"spruehduese", "spritzduese", "spruehkopf"}, ExtraTags: []string{"spray_nozzle"}},
|
{Keywords: []string{"spruehduese", "spritzduese", "spruehkopf"}, ExtraTags: []string{"spray_nozzle"}},
|
||||||
{Keywords: []string{"galvanikbad", "tauchbad", "beizbad", "chemiebad"}, ExtraTags: []string{"chemical_bath"}},
|
{Keywords: []string{"galvanikbad", "tauchbad", "beizbad", "chemiebad"}, ExtraTags: []string{"chemical_bath"}},
|
||||||
{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie"}, ExtraTags: []string{"battery"}},
|
{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie", "lithium", "batteriespeicher", "hochvoltbatterie", "lithium-batterie"}, ExtraTags: []string{"battery"}},
|
||||||
{Keywords: []string{"heizelement", "heizpatrone", "heizband"}, ExtraTags: []string{"heating_element"}},
|
{Keywords: []string{"heizelement", "heizpatrone", "heizband"}, ExtraTags: []string{"heating_element"}},
|
||||||
{Keywords: []string{"uv-lampe", "uv-strahler", "uv-c-strahler"}, ExtraTags: []string{"uv_source"}},
|
{Keywords: []string{"uv-lampe", "uv-strahler", "uv-c-strahler"}, ExtraTags: []string{"uv_source"}},
|
||||||
{Keywords: []string{"roentgen", "radioaktiv", "strahlenquelle", "gammastrahl", "isotop"}, ExtraTags: []string{"radiation_source"}},
|
{Keywords: []string{"roentgen", "radioaktiv", "strahlenquelle", "gammastrahl", "isotop"}, ExtraTags: []string{"radiation_source"}},
|
||||||
|
|||||||
@@ -42,3 +42,29 @@ func guardedLifecycles(p HazardPattern, tagSet map[string]bool) []string {
|
|||||||
}
|
}
|
||||||
return p.ApplicableLifecycles
|
return p.ApplicableLifecycles
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Domain-specific supersession.
|
||||||
|
//
|
||||||
|
// A generic pattern that fires via a broad tag (e.g. high_temperature) can
|
||||||
|
// duplicate a domain-specific pattern that describes the same hazard more
|
||||||
|
// precisely. When the domain is present, the specific pattern wins and the
|
||||||
|
// generic duplicate is dropped. Scoped to the domain tag, so machines outside
|
||||||
|
// the domain keep the generic pattern — regression-safe by construction.
|
||||||
|
//
|
||||||
|
// HP016 (generic hot surfaces) -> HP2201 (Boiler/Tank/Spuelkammer)
|
||||||
|
// HP018 (actuator burn) -> HP2201 (same contact-burn hazard)
|
||||||
|
// HP013 (stored electrical NRG) -> HP144 (residual voltage; HP013's zone is
|
||||||
|
// framed for Batteriefaecher/USV-Anlagen a
|
||||||
|
// dishwasher does not have, HP144 is the
|
||||||
|
// Frequenzumrichter/Zwischenkreis variant)
|
||||||
|
var genericSupersededByWarewashing = map[string]bool{
|
||||||
|
"HP016": true,
|
||||||
|
"HP018": true,
|
||||||
|
"HP013": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// supersededByDomainSpecific reports whether a generic pattern is replaced by a
|
||||||
|
// more precise equivalent that the project's domain already provides.
|
||||||
|
func supersededByDomainSpecific(p HazardPattern, tagSet map[string]bool) bool {
|
||||||
|
return tagSet["dom_warewashing"] && genericSupersededByWarewashing[p.ID]
|
||||||
|
}
|
||||||
|
|||||||
@@ -416,6 +416,11 @@ func patternMatches(p HazardPattern, tagSet map[string]bool, input MatchInput) b
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Domain-specific supersession (generic duplicate replaced by a precise one).
|
||||||
|
if supersededByDomainSpecific(p, tagSet) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,143 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Coverage blind-spot proposer (P2 slice 6, type 4). DEV-TIME, propose-only.
|
||||||
|
//
|
||||||
|
// Deterministic skeleton: which EN ISO 12100 hazard groups (A-G, the classic CE
|
||||||
|
// groups; H-J are control/CRA and routinely routed elsewhere) did the engine
|
||||||
|
// leave with ZERO hazards for this machine? An empty group is a structural
|
||||||
|
// blind-spot signal — the machine may genuinely lack that hazard, or a pattern
|
||||||
|
// may be missing. The LLM then expands each gap into specific expected-but-missing
|
||||||
|
// hazards a safety assessor would name, for a human to confirm into a new pattern
|
||||||
|
// or GT case. The gaps alone are useful without any model.
|
||||||
|
|
||||||
|
type isoGroup struct {
|
||||||
|
Key string
|
||||||
|
Label string
|
||||||
|
Cats []string
|
||||||
|
}
|
||||||
|
|
||||||
|
var iso12100Groups = []isoGroup{
|
||||||
|
{"mechanical", "A. Mechanisch", []string{"mechanical_hazard", "mechanical", "maintenance_hazard"}},
|
||||||
|
{"electrical", "B. Elektrisch", []string{"electrical_hazard", "electrical", "emc_hazard"}},
|
||||||
|
{"thermal", "C. Thermisch", []string{"thermal_hazard", "thermal", "high_temperature", "fire_explosion"}},
|
||||||
|
{"pneumatic_hydraulic", "D. Pneumatik/Hydraulik", []string{"pneumatic_hydraulic"}},
|
||||||
|
{"noise_vibration", "E. Laerm/Vibration", []string{"noise_hazard", "noise_vibration", "vibration_hazard"}},
|
||||||
|
{"ergonomic", "F. Ergonomie", []string{"ergonomic_hazard", "ergonomic"}},
|
||||||
|
{"material", "G. Stoffe/Umwelt", []string{"material_environmental", "chemical_risk", "radiation_hazard"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
// CoverageGap is an ISO 12100 hazard group with no engine hazard.
|
||||||
|
type CoverageGap struct {
|
||||||
|
Group string `json:"group"`
|
||||||
|
Key string `json:"key"`
|
||||||
|
Note string `json:"note"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindCoverageGaps returns the A-G hazard groups that produced zero hazards.
|
||||||
|
func FindCoverageGaps(hazards []Hazard) []CoverageGap {
|
||||||
|
present := make(map[string]bool, len(hazards))
|
||||||
|
for _, h := range hazards {
|
||||||
|
present[h.Category] = true
|
||||||
|
}
|
||||||
|
var gaps []CoverageGap
|
||||||
|
for _, g := range iso12100Groups {
|
||||||
|
covered := false
|
||||||
|
for _, c := range g.Cats {
|
||||||
|
if present[c] {
|
||||||
|
covered = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !covered {
|
||||||
|
gaps = append(gaps, CoverageGap{
|
||||||
|
Group: g.Label, Key: g.Key,
|
||||||
|
Note: "no engine hazard in this ISO 12100 group — verify the machine truly lacks it, or a pattern is missing",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return gaps
|
||||||
|
}
|
||||||
|
|
||||||
|
// MissingHazard is an LLM-proposed hazard a safety assessor would expect.
|
||||||
|
type MissingHazard struct {
|
||||||
|
Group string `json:"group"`
|
||||||
|
Hazard string `json:"hazard"`
|
||||||
|
Why string `json:"why"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ProposeMissingHazards asks the LLM to expand the empty groups into specific
|
||||||
|
// expected hazards. Returns nil without a completer or on any error — propose-only,
|
||||||
|
// never breaks the run.
|
||||||
|
func ProposeMissingHazards(ctx context.Context, completer LLMCompleter, machineClass, narrative string, produced []Hazard, gaps []CoverageGap) []MissingHazard {
|
||||||
|
if completer == nil || len(gaps) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
system, user := BuildCoveragePrompt(machineClass, narrative, produced, gaps)
|
||||||
|
raw, err := completer.Complete(ctx, system, user)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return parseMissingHazards(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildCoveragePrompt frames the "what is missing?" question for the LLM.
|
||||||
|
func BuildCoveragePrompt(machineClass, narrative string, produced []Hazard, gaps []CoverageGap) (system, user string) {
|
||||||
|
system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
|
||||||
|
"Dir werden eine Maschine, die bereits erkannten Gefaehrdungen und Gefaehrdungsgruppen OHNE Eintrag genannt. " +
|
||||||
|
"Nenne nur Gefaehrdungen, die ein Sachverstaendiger fuer DIESE Maschine ERWARTET, die aber FEHLEN. " +
|
||||||
|
"Erfinde nichts Maschinenfremdes. Antworte AUSSCHLIESSLICH als JSON-Array: " +
|
||||||
|
`[{"group":"...","hazard":"...","why":"..."}].`
|
||||||
|
|
||||||
|
var have []string
|
||||||
|
seen := map[string]bool{}
|
||||||
|
for _, h := range produced {
|
||||||
|
if h.Category != "" && !seen[h.Category] {
|
||||||
|
seen[h.Category] = true
|
||||||
|
have = append(have, h.Category)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var empty []string
|
||||||
|
for _, g := range gaps {
|
||||||
|
empty = append(empty, g.Group)
|
||||||
|
}
|
||||||
|
user = fmt.Sprintf("Maschinenklasse: %s\n\nBeschreibung:\n%s\n\nBereits erkannte Kategorien: %s\n\nGruppen OHNE Eintrag (Fokus): %s\n\nWelche erwarteten Gefaehrdungen fehlen?",
|
||||||
|
machineClass, narrative, strings.Join(have, ", "), strings.Join(empty, ", "))
|
||||||
|
return system, user
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseMissingHazards(raw string) []MissingHazard {
|
||||||
|
start, end := strings.Index(raw, "["), strings.LastIndex(raw, "]")
|
||||||
|
if start < 0 || end <= start {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var out []MissingHazard
|
||||||
|
if err := json.Unmarshal([]byte(raw[start:end+1]), &out); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// RenderCoverageQueue renders the deterministic gaps plus any LLM-proposed missing
|
||||||
|
// hazards as a markdown review queue.
|
||||||
|
func RenderCoverageQueue(machine string, gaps []CoverageGap, missing []MissingHazard) string {
|
||||||
|
var b strings.Builder
|
||||||
|
fmt.Fprintf(&b, "# Coverage blind-spot queue — %s\n\n", machine)
|
||||||
|
fmt.Fprintf(&b, "%d ISO 12100 group(s) (A-G) have no engine hazard. Propose-only — a human confirms whether the machine truly lacks it or a pattern/GT case is missing.\n\n", len(gaps))
|
||||||
|
for _, g := range gaps {
|
||||||
|
fmt.Fprintf(&b, "- **%s** — %s\n", g.Group, g.Note)
|
||||||
|
}
|
||||||
|
if len(missing) > 0 {
|
||||||
|
fmt.Fprintf(&b, "\n## LLM-proposed expected-but-missing hazards (%d)\n\n", len(missing))
|
||||||
|
for i, m := range missing {
|
||||||
|
fmt.Fprintf(&b, "%d. [%s] %s\n - why: %s\n", i+1, m.Group, m.Hazard, m.Why)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFindCoverageGaps(t *testing.T) {
|
||||||
|
hazards := []Hazard{
|
||||||
|
{Category: "mechanical_hazard"},
|
||||||
|
{Category: "thermal_hazard"},
|
||||||
|
{Category: "electrical_hazard"},
|
||||||
|
{Category: "material_environmental"},
|
||||||
|
}
|
||||||
|
gapKeys := map[string]bool{}
|
||||||
|
for _, g := range FindCoverageGaps(hazards) {
|
||||||
|
gapKeys[g.Key] = true
|
||||||
|
}
|
||||||
|
for _, want := range []string{"pneumatic_hydraulic", "noise_vibration", "ergonomic"} {
|
||||||
|
if !gapKeys[want] {
|
||||||
|
t.Errorf("expected gap %s", want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, notWant := range []string{"mechanical", "thermal", "electrical", "material"} {
|
||||||
|
if gapKeys[notWant] {
|
||||||
|
t.Errorf("did not expect gap %s (covered)", notWant)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildCoveragePrompt_ContainsContext(t *testing.T) {
|
||||||
|
produced := []Hazard{{Category: "thermal_hazard"}}
|
||||||
|
gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
|
||||||
|
system, user := BuildCoveragePrompt("Geschirrspuelmaschine", "Eine Spuelmaschine mit Tank.", produced, gaps)
|
||||||
|
if !strings.Contains(system, "EN ISO 12100") || !strings.Contains(system, "JSON") {
|
||||||
|
t.Errorf("system prompt missing framing")
|
||||||
|
}
|
||||||
|
for _, want := range []string{"Geschirrspuelmaschine", "thermal_hazard", "F. Ergonomie", "Spuelmaschine mit Tank"} {
|
||||||
|
if !strings.Contains(user, want) {
|
||||||
|
t.Errorf("user prompt missing %q", want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProposeMissingHazards_ParsesAndDegrades(t *testing.T) {
|
||||||
|
gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
|
||||||
|
c := fakeCompleter{out: `Hier: [{"group":"F. Ergonomie","hazard":"Heben schwerer Koerbe","why":"manuelles Beladen"}] fertig`}
|
||||||
|
got := ProposeMissingHazards(context.Background(), c, "x", "n", nil, gaps)
|
||||||
|
if len(got) != 1 || got[0].Hazard != "Heben schwerer Koerbe" {
|
||||||
|
t.Fatalf("parse: got %+v", got)
|
||||||
|
}
|
||||||
|
if ProposeMissingHazards(context.Background(), nil, "x", "n", nil, gaps) != nil {
|
||||||
|
t.Errorf("nil completer must return nil")
|
||||||
|
}
|
||||||
|
if ProposeMissingHazards(context.Background(), fakeCompleter{err: context.DeadlineExceeded}, "x", "n", nil, gaps) != nil {
|
||||||
|
t.Errorf("error must return nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Offline dedup-candidate proposer (P2, type 1). DEV-TIME ONLY.
|
||||||
|
//
|
||||||
|
// It inspects the patterns that fired for one machine and proposes which look
|
||||||
|
// like duplicates, so a human (later an LLM) can decide a supersession/merge. It
|
||||||
|
// NEVER mutates the pattern library or the runtime — it only surfaces candidates.
|
||||||
|
// The deterministic GT screen (ScreenSupersession, proposer_screen.go) is the
|
||||||
|
// wall that proves a proposal is safe before a human ever sees it.
|
||||||
|
//
|
||||||
|
// Detection here is purely structural (category + zone + measure + scenario
|
||||||
|
// overlap) and therefore reproducible. Two safety rules bake in what P1 taught
|
||||||
|
// us about the dishwasher review:
|
||||||
|
// - only patterns with the SAME primary category are ever compared;
|
||||||
|
// - a pair with DIFFERENT operational states is NEVER proposed, because
|
||||||
|
// normal-operation and maintenance are legitimately distinct contexts with
|
||||||
|
// different protective measures (e.g. HP011 vs HP077). Merging them would
|
||||||
|
// erase the maintenance view.
|
||||||
|
|
||||||
|
// DedupCandidate is a proposed near-duplicate pattern pair for one machine class.
|
||||||
|
type DedupCandidate struct {
|
||||||
|
KeepPattern string `json:"keep_pattern"` // higher-priority survivor
|
||||||
|
DropPattern string `json:"drop_pattern"` // supersession target
|
||||||
|
KeepName string `json:"keep_name"`
|
||||||
|
KeepHazardName string `json:"keep_hazard_name"` // keep pattern ScenarioDE (for the GT-distinctness screen)
|
||||||
|
DropName string `json:"drop_name"` // == generated hazard Name (ScenarioDE) of the drop pattern
|
||||||
|
Category string `json:"category"`
|
||||||
|
ZoneJaccard float64 `json:"zone_jaccard"`
|
||||||
|
MeasureJaccard float64 `json:"measure_jaccard"`
|
||||||
|
ScenarioJaccard float64 `json:"scenario_jaccard"`
|
||||||
|
Score float64 `json:"score"`
|
||||||
|
Rationale string `json:"rationale"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindDedupCandidates compares the fired patterns pairwise and returns near-dup
|
||||||
|
// candidates whose combined overlap score meets threshold, deterministically
|
||||||
|
// ordered (score desc, then drop-pattern id). The combined score weights measure
|
||||||
|
// overlap highest (shared measures are the strongest duplicate signal), then zone
|
||||||
|
// and scenario equally.
|
||||||
|
func FindDedupCandidates(fired []PatternMatch, threshold float64) []DedupCandidate {
|
||||||
|
var out []DedupCandidate
|
||||||
|
for i := 0; i < len(fired); i++ {
|
||||||
|
for j := i + 1; j < len(fired); j++ {
|
||||||
|
a, b := fired[i], fired[j]
|
||||||
|
ca := primaryCat(a)
|
||||||
|
if ca == "" || ca != primaryCat(b) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !sameOpStateSet(a.OperationalStates, b.OperationalStates) {
|
||||||
|
continue // legitimate lifecycle variants — never propose a merge
|
||||||
|
}
|
||||||
|
zj := tokenJaccard(zoneTokenSet(a.ZoneDE), zoneTokenSet(b.ZoneDE))
|
||||||
|
mj := tokenJaccard(toSet(a.SuggestedMeasureIDs), toSet(b.SuggestedMeasureIDs))
|
||||||
|
sj := tokenJaccard(wordTokenSet(a.ScenarioDE), wordTokenSet(b.ScenarioDE))
|
||||||
|
score := 0.4*mj + 0.3*zj + 0.3*sj
|
||||||
|
if score < threshold {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
keep, drop := a, b
|
||||||
|
if b.Priority > a.Priority {
|
||||||
|
keep, drop = b, a
|
||||||
|
}
|
||||||
|
out = append(out, DedupCandidate{
|
||||||
|
KeepPattern: keep.PatternID, DropPattern: drop.PatternID,
|
||||||
|
KeepName: keep.PatternName, KeepHazardName: keep.ScenarioDE, DropName: drop.ScenarioDE,
|
||||||
|
Category: ca, ZoneJaccard: round2(zj), MeasureJaccard: round2(mj),
|
||||||
|
ScenarioJaccard: round2(sj), Score: round2(score),
|
||||||
|
Rationale: fmt.Sprintf(
|
||||||
|
"same category %q · measure overlap %.0f%% · zone overlap %.0f%% · scenario overlap %.0f%% → keep %s (P%d), supersede %s (P%d)",
|
||||||
|
ca, mj*100, zj*100, sj*100, keep.PatternID, keep.Priority, drop.PatternID, drop.Priority),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.SliceStable(out, func(i, j int) bool {
|
||||||
|
if out[i].Score != out[j].Score {
|
||||||
|
return out[i].Score > out[j].Score
|
||||||
|
}
|
||||||
|
return out[i].DropPattern < out[j].DropPattern
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func primaryCat(pm PatternMatch) string {
|
||||||
|
if len(pm.HazardCats) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return pm.HazardCats[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
func sameOpStateSet(a, b []string) bool {
|
||||||
|
sa, sb := toSet(a), toSet(b)
|
||||||
|
if len(sa) != len(sb) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for k := range sa {
|
||||||
|
if !sb[k] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
var proposerWordSplit = regexp.MustCompile(`[^\p{L}]+`)
|
||||||
|
|
||||||
|
// zoneTokenSet splits a comma-separated zone string into its component terms.
|
||||||
|
func zoneTokenSet(zone string) map[string]bool {
|
||||||
|
out := map[string]bool{}
|
||||||
|
for _, part := range strings.Split(strings.ToLower(zone), ",") {
|
||||||
|
if t := strings.TrimSpace(part); len([]rune(t)) >= 3 {
|
||||||
|
out[t] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// wordTokenSet tokenises free text into words of length >= 4 (drops connectives).
|
||||||
|
func wordTokenSet(s string) map[string]bool {
|
||||||
|
out := map[string]bool{}
|
||||||
|
for _, w := range proposerWordSplit.Split(strings.ToLower(s), -1) {
|
||||||
|
if len([]rune(w)) >= 4 {
|
||||||
|
out[w] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func tokenJaccard(a, b map[string]bool) float64 {
|
||||||
|
if len(a) == 0 && len(b) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
inter := 0
|
||||||
|
for k := range a {
|
||||||
|
if b[k] {
|
||||||
|
inter++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
union := len(a) + len(b) - inter
|
||||||
|
if union == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return float64(inter) / float64(union)
|
||||||
|
}
|
||||||
|
|
||||||
|
func round2(x float64) float64 { return math.Round(x*100) / 100 }
|
||||||
@@ -0,0 +1,67 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func mkPM(id, cat, zone, scenario string, prio int, measures, opstates []string) PatternMatch {
|
||||||
|
return PatternMatch{
|
||||||
|
PatternID: id, PatternName: id, Priority: prio,
|
||||||
|
HazardCats: []string{cat}, ZoneDE: zone, ScenarioDE: scenario,
|
||||||
|
SuggestedMeasureIDs: measures, OperationalStates: opstates,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindDedupCandidates_FindsOverlappingPair(t *testing.T) {
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HPa", "update_failure", "Steuerung, SPS", "Software-Update der Steuerung scheitert nach Abbruch", 80,
|
||||||
|
[]string{"M138", "M146"}, nil),
|
||||||
|
mkPM("HPb", "update_failure", "Steuerung, Antriebsregler", "Software-Update der Steuerung schlaegt fehl", 75,
|
||||||
|
[]string{"M138", "M146", "M141"}, nil),
|
||||||
|
mkPM("HPc", "mechanical_hazard", "Tuer", "Quetschen der Finger an der Tuer", 70,
|
||||||
|
[]string{"M003"}, nil),
|
||||||
|
}
|
||||||
|
got := FindDedupCandidates(fired, 0.4)
|
||||||
|
if len(got) != 1 {
|
||||||
|
t.Fatalf("want 1 candidate, got %d: %+v", len(got), got)
|
||||||
|
}
|
||||||
|
// Higher-priority pattern survives, lower one is the drop target.
|
||||||
|
if got[0].KeepPattern != "HPa" || got[0].DropPattern != "HPb" {
|
||||||
|
t.Errorf("want keep HPa / drop HPb, got keep %s / drop %s", got[0].KeepPattern, got[0].DropPattern)
|
||||||
|
}
|
||||||
|
if got[0].DropName != "Software-Update der Steuerung schlaegt fehl" {
|
||||||
|
t.Errorf("DropName must equal drop pattern ScenarioDE, got %q", got[0].DropName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindDedupCandidates_LifecycleGuard(t *testing.T) {
|
||||||
|
// Same category, zone and measures — but normal-operation vs maintenance.
|
||||||
|
// These are legitimate variants (HP011 vs HP077) and must NOT be proposed.
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HP011", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 95,
|
||||||
|
[]string{"M481", "M482"}, nil),
|
||||||
|
mkPM("HP077", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 80,
|
||||||
|
[]string{"M481", "M482"}, []string{"maintenance"}),
|
||||||
|
}
|
||||||
|
if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
|
||||||
|
t.Fatalf("lifecycle guard failed: want 0 candidates, got %d: %+v", len(got), got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindDedupCandidates_DifferentCategoryIgnored(t *testing.T) {
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HPa", "thermal_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
|
||||||
|
mkPM("HPb", "mechanical_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
|
||||||
|
}
|
||||||
|
if got := FindDedupCandidates(fired, 0.3); len(got) != 0 {
|
||||||
|
t.Fatalf("cross-category pair must not be proposed, got %d", len(got))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindDedupCandidates_BelowThresholdDropped(t *testing.T) {
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HPa", "mechanical_hazard", "Tuer", "Quetschen an der Tuer", 80, []string{"M003"}, nil),
|
||||||
|
mkPM("HPb", "mechanical_hazard", "Foerderband", "Einzug am Foerderband", 80, []string{"M540"}, nil),
|
||||||
|
}
|
||||||
|
if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
|
||||||
|
t.Fatalf("disjoint pair must be below threshold, got %d: %+v", len(got), got)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,154 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Foreign-framing proposer (P2 slice 4, type 2). DEV-TIME, propose-only.
|
||||||
|
//
|
||||||
|
// A pattern can fire for a machine yet describe its hazard with a zone text
|
||||||
|
// framed for a DIFFERENT machine (e.g. a dishwasher hazard whose zone names
|
||||||
|
// "Walzen, Transportbaender" or "Bearbeitungszone"). Such foreign framing leaks
|
||||||
|
// through terms that are NOT yet in domainGateTerms — once a term is a gate term,
|
||||||
|
// the ghost-pattern invariant already fences the pattern out. So we surface the
|
||||||
|
// candidates structurally: zone terms a fired pattern names that the machine's
|
||||||
|
// narrative never mentions (minus generic hazard-location vocabulary). A human
|
||||||
|
// (or the LLM) then decides: add a dom_* gate term, or re-frame the zone text.
|
||||||
|
//
|
||||||
|
// This OVER-surfaces by design — the human/LLM is the precision filter, not the
|
||||||
|
// detector (same contract as the dedup proposer).
|
||||||
|
|
||||||
|
// genericHazardStop are hazard-LOCATION words that legitimately appear in zones
|
||||||
|
// without being echoed in a narrative — they are not evidence of foreign framing.
|
||||||
|
var genericHazardStop = map[string]bool{
|
||||||
|
"quetschstelle": true, "einzugstelle": true, "einzugsstelle": true, "scherstelle": true,
|
||||||
|
"schneidstelle": true, "stossstelle": true, "fangstelle": true, "klemmstelle": true,
|
||||||
|
"gefahrbereich": true, "gefahrenbereich": true, "gefahrstelle": true, "gefahrenstelle": true,
|
||||||
|
"arbeitsbereich": true, "wirkbereich": true, "schutzbereich": true, "umgebung": true,
|
||||||
|
"bereich": true, "zugang": true, "oberflaeche": true, "oberflaechen": true,
|
||||||
|
"gehaeuse": true, "bauteil": true, "bauteile": true, "komponente": true, "maschine": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// FramingCandidate is a fired pattern whose zone text looks foreign for the machine.
|
||||||
|
type FramingCandidate struct {
|
||||||
|
Pattern string `json:"pattern"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Category string `json:"category"`
|
||||||
|
Zone string `json:"zone"`
|
||||||
|
OrphanTerms []string `json:"orphan_terms"`
|
||||||
|
OrphanFraction float64 `json:"orphan_fraction"`
|
||||||
|
Verdict string `json:"verdict"` // heuristic lean: foreign | plausible
|
||||||
|
Evidence string `json:"evidence"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindFramingCandidates returns fired patterns whose zone is mostly not echoed in
|
||||||
|
// the narrative, sorted by orphan fraction descending (deterministic).
|
||||||
|
func FindFramingCandidates(fired []PatternMatch, narrative string, minFraction float64) []FramingCandidate {
|
||||||
|
nar := strings.ToLower(narrative)
|
||||||
|
var narStems []string
|
||||||
|
for _, w := range proposerWordSplit.Split(nar, -1) {
|
||||||
|
if len([]rune(w)) >= 5 {
|
||||||
|
narStems = append(narStems, w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var out []FramingCandidate
|
||||||
|
for _, pm := range fired {
|
||||||
|
parts := zoneParts(pm.ZoneDE)
|
||||||
|
if len(parts) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var orphans []string
|
||||||
|
for _, p := range parts {
|
||||||
|
if !partEchoed(p, nar, narStems) {
|
||||||
|
orphans = append(orphans, p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
frac := float64(len(orphans)) / float64(len(parts))
|
||||||
|
if len(orphans) == 0 || frac < minFraction {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, FramingCandidate{
|
||||||
|
Pattern: pm.PatternID, Name: pm.PatternName, Category: primaryCat(pm),
|
||||||
|
Zone: pm.ZoneDE, OrphanTerms: orphans, OrphanFraction: round2(frac),
|
||||||
|
Verdict: framingHeuristicVerdict(frac),
|
||||||
|
Evidence: fmt.Sprintf("%d/%d zone terms have no narrative echo: %s", len(orphans), len(parts), strings.Join(orphans, ", ")),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
sort.SliceStable(out, func(i, j int) bool {
|
||||||
|
if out[i].OrphanFraction != out[j].OrphanFraction {
|
||||||
|
return out[i].OrphanFraction > out[j].OrphanFraction
|
||||||
|
}
|
||||||
|
return out[i].Pattern < out[j].Pattern
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func framingHeuristicVerdict(frac float64) string {
|
||||||
|
if frac >= 0.99 {
|
||||||
|
return "foreign" // nothing in the zone is echoed by the narrative
|
||||||
|
}
|
||||||
|
return "plausible" // partial echo — likely generic vocabulary, human to confirm
|
||||||
|
}
|
||||||
|
|
||||||
|
// zoneParts splits a zone string into significant terms on commas, slashes,
|
||||||
|
// parentheses and semicolons, lowercased, length >= 4.
|
||||||
|
func zoneParts(zone string) []string {
|
||||||
|
fields := strings.FieldsFunc(strings.ToLower(zone), func(r rune) bool {
|
||||||
|
return r == ',' || r == '/' || r == ';' || r == '(' || r == ')'
|
||||||
|
})
|
||||||
|
var out []string
|
||||||
|
for _, f := range fields {
|
||||||
|
if t := strings.TrimSpace(f); len([]rune(t)) >= 4 {
|
||||||
|
out = append(out, t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// partEchoed reports whether a zone part is reflected in the narrative. Matching
|
||||||
|
// is bidirectional to survive German compounding: a zone word echoes if it is a
|
||||||
|
// generic hazard term, if it is a substring of the narrative, OR if any narrative
|
||||||
|
// stem (>= 5 chars) is a substring of the zone word (so narrative "Steuerung"
|
||||||
|
// echoes zone "Steuerungssystem").
|
||||||
|
func partEchoed(part, narrative string, narStems []string) bool {
|
||||||
|
for _, w := range strings.Fields(part) {
|
||||||
|
if genericHazardStop[w] {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if len([]rune(w)) < 4 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.Contains(narrative, w) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for _, ns := range narStems {
|
||||||
|
if strings.Contains(w, ns) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// RenderFramingQueue renders foreign-framing candidates as a markdown review queue.
|
||||||
|
func RenderFramingQueue(machine string, candidates []FramingCandidate) string {
|
||||||
|
var b strings.Builder
|
||||||
|
fmt.Fprintf(&b, "# Foreign-framing review queue — %s\n\n", machine)
|
||||||
|
fmt.Fprintf(&b, "%d fired pattern(s) name zone terms the narrative never mentions. Propose-only — a human (or the LLM) decides: add a dom_* gate term, or re-frame the zone.\n\n", len(candidates))
|
||||||
|
for i, c := range candidates {
|
||||||
|
fmt.Fprintf(&b, "## %d. %s — %s [%s, orphan %.0f%%]\n", i+1, c.Pattern, c.Name, c.Verdict, c.OrphanFraction*100)
|
||||||
|
fmt.Fprintf(&b, "- category: %s\n- zone: %s\n", c.Category, c.Zone)
|
||||||
|
fmt.Fprintf(&b, "- orphan terms (no narrative echo): %s\n", strings.Join(c.OrphanTerms, ", "))
|
||||||
|
fmt.Fprintf(&b, "- suggested action: %s\n\n", framingAction(c.Verdict))
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func framingAction(verdict string) string {
|
||||||
|
if verdict == "foreign" {
|
||||||
|
return "likely foreign-framed — propose a dom_* gate term for the orphan term(s), or re-frame the zone; human confirms + commits + pins a GT case"
|
||||||
|
}
|
||||||
|
return "partial echo — likely generic vocabulary; human to confirm whether any orphan term is a foreign-machine component"
|
||||||
|
}
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestFindFramingCandidates_FlagsForeignZone(t *testing.T) {
|
||||||
|
narrative := "Gewerbliche Geschirrspuelmaschine mit Boiler und Tank. Die Tuer ist verriegelt."
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HPforeign", "mechanical_hazard", "Walzen, Transportbaender, Bearbeitungszone", "Einzug", 80, nil, nil),
|
||||||
|
mkPM("HPlocal", "thermal_hazard", "Boiler, Tank, Tuer", "Verbrennung", 80, nil, nil),
|
||||||
|
mkPM("HPgeneric", "mechanical_hazard", "Quetschstelle, Gefahrbereich", "Quetschen", 80, nil, nil),
|
||||||
|
}
|
||||||
|
got := FindFramingCandidates(fired, narrative, 0.6)
|
||||||
|
if len(got) != 1 || got[0].Pattern != "HPforeign" {
|
||||||
|
t.Fatalf("want only HPforeign flagged, got %+v", got)
|
||||||
|
}
|
||||||
|
if got[0].Verdict != "foreign" {
|
||||||
|
t.Errorf("fully-orphan zone should be 'foreign', got %s", got[0].Verdict)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindFramingCandidates_PartialEchoIsPlausible(t *testing.T) {
|
||||||
|
narrative := "Maschine mit Boiler und Tank."
|
||||||
|
fired := []PatternMatch{
|
||||||
|
mkPM("HPx", "thermal_hazard", "Boiler, Tank, Auspuffleitung", "x", 80, nil, nil),
|
||||||
|
}
|
||||||
|
got := FindFramingCandidates(fired, narrative, 0.3)
|
||||||
|
if len(got) != 1 {
|
||||||
|
t.Fatalf("want 1 candidate (1/3 orphan >= 0.3), got %d", len(got))
|
||||||
|
}
|
||||||
|
if got[0].Verdict != "plausible" || len(got[0].OrphanTerms) != 1 || got[0].OrphanTerms[0] != "auspuffleitung" {
|
||||||
|
t.Errorf("want plausible + orphan [auspuffleitung], got %s %v", got[0].Verdict, got[0].OrphanTerms)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,123 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "github.com/google/uuid"
|
||||||
|
|
||||||
|
// Non-test plumbing for the offline proposer (P2 slice 3): run the engine for a
|
||||||
|
// narrative and produce the fired patterns + the engine-built hazards/mitigations
|
||||||
|
// the dedup proposer and GT screen consume. This is the same pipeline the GT
|
||||||
|
// benchmark tests use, lifted out of test scope so the dev-time CLI can call it.
|
||||||
|
|
||||||
|
// universalLifecyclePhases are appended so patterns gated to a specific lifecycle
|
||||||
|
// (maintenance/cleaning/setup/fault clearing) still fire — the proposer wants the
|
||||||
|
// full hazard picture, not only normal-operation hazards.
|
||||||
|
var universalLifecyclePhases = []string{"normal_operation", "maintenance", "cleaning", "setup", "fault_clearing"}
|
||||||
|
|
||||||
|
// BuildProposerInput parses a narrative, runs the pattern engine, keeps the
|
||||||
|
// narrative-relevant patterns, and returns the hazards, mitigations and fired
|
||||||
|
// patterns. NOTE: it does not apply the CE cyber-category skip, so the proposer
|
||||||
|
// view may include cyber/AI hazards that the CE log excludes — harmless for the
|
||||||
|
// GT recall screen (they match no CE ground-truth entry).
|
||||||
|
func BuildProposerInput(narrative, machineType string, extraMachineTypes []string) ([]Hazard, []Mitigation, []PatternMatch) {
|
||||||
|
res := ParseNarrative(narrative, machineType)
|
||||||
|
|
||||||
|
var compIDs, compNames, energyIDs []string
|
||||||
|
for _, c := range res.Components {
|
||||||
|
if c.Negated {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
compIDs = append(compIDs, c.LibraryID)
|
||||||
|
compNames = append(compNames, c.NameDE)
|
||||||
|
}
|
||||||
|
for _, e := range res.EnergySources {
|
||||||
|
energyIDs = append(energyIDs, e.SourceID)
|
||||||
|
}
|
||||||
|
|
||||||
|
machineTypes := append([]string{}, extraMachineTypes...)
|
||||||
|
if machineType != "" {
|
||||||
|
machineTypes = append(machineTypes, machineType)
|
||||||
|
}
|
||||||
|
lifecycles := append(append([]string{}, res.LifecyclePhases...), universalLifecyclePhases...)
|
||||||
|
|
||||||
|
out := NewPatternEngine().Match(MatchInput{
|
||||||
|
ComponentLibraryIDs: compIDs,
|
||||||
|
EnergySourceIDs: energyIDs,
|
||||||
|
LifecyclePhases: lifecycles,
|
||||||
|
CustomTags: res.CustomTags,
|
||||||
|
OperationalStates: res.OperationalStates,
|
||||||
|
StateTransitions: res.StateTransitions,
|
||||||
|
HumanRoles: res.Roles,
|
||||||
|
MachineTypes: machineTypes,
|
||||||
|
})
|
||||||
|
|
||||||
|
kept := make([]PatternMatch, 0, len(out.MatchedPatterns))
|
||||||
|
for _, pm := range out.MatchedPatterns {
|
||||||
|
if IsPatternRelevant(pm, narrative, compNames) {
|
||||||
|
kept = append(kept, pm)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
filtered := *out
|
||||||
|
filtered.MatchedPatterns = kept
|
||||||
|
hazards, mits := patternsToHazardsAndMitigations(&filtered)
|
||||||
|
return hazards, mits, kept
|
||||||
|
}
|
||||||
|
|
||||||
|
// patternsToHazardsAndMitigations converts engine output into the hazard/mitigation
|
||||||
|
// entities the benchmark + proposer compare on. Simplified vs InitializeProject
|
||||||
|
// (no risk estimation, no norm refs) — it only needs category/zone/scenario/measures.
|
||||||
|
func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
|
||||||
|
hazards := make([]Hazard, 0, len(out.MatchedPatterns))
|
||||||
|
patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
|
||||||
|
|
||||||
|
for _, pm := range out.MatchedPatterns {
|
||||||
|
cat := ""
|
||||||
|
if len(pm.HazardCats) > 0 {
|
||||||
|
cat = pm.HazardCats[0]
|
||||||
|
}
|
||||||
|
lifecycle := ""
|
||||||
|
if len(pm.ApplicableLifecycles) > 0 {
|
||||||
|
lifecycle = pm.ApplicableLifecycles[0]
|
||||||
|
}
|
||||||
|
h := Hazard{
|
||||||
|
ID: uuid.New(),
|
||||||
|
Name: pm.ScenarioDE,
|
||||||
|
Category: cat,
|
||||||
|
Description: pm.ScenarioDE,
|
||||||
|
Scenario: pm.ScenarioDE,
|
||||||
|
TriggerEvent: pm.TriggerDE,
|
||||||
|
PossibleHarm: pm.HarmDE,
|
||||||
|
AffectedPerson: pm.AffectedDE,
|
||||||
|
HazardousZone: pm.ZoneDE,
|
||||||
|
LifecyclePhase: lifecycle,
|
||||||
|
}
|
||||||
|
if h.Name == "" {
|
||||||
|
h.Name = pm.PatternName
|
||||||
|
}
|
||||||
|
hazards = append(hazards, h)
|
||||||
|
patternToHazard[pm.PatternID] = h.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
measureNames := make(map[string]string)
|
||||||
|
for _, m := range GetProtectiveMeasureLibrary() {
|
||||||
|
measureNames[m.ID] = m.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
var mitigations []Mitigation
|
||||||
|
for _, sm := range out.SuggestedMeasures {
|
||||||
|
name := measureNames[sm.MeasureID]
|
||||||
|
if name == "" {
|
||||||
|
name = sm.MeasureID
|
||||||
|
}
|
||||||
|
for _, srcPattern := range sm.SourcePatterns {
|
||||||
|
hid, ok := patternToHazard[srcPattern]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
mitigations = append(mitigations, Mitigation{
|
||||||
|
ID: uuid.New(),
|
||||||
|
HazardID: hid,
|
||||||
|
Name: name,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hazards, mitigations
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestBuildProposerInput_WarewashingFires(t *testing.T) {
|
||||||
|
hazards, _, fired := BuildProposerInput(
|
||||||
|
warewashingNarrative,
|
||||||
|
"Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)",
|
||||||
|
[]string{"food_processing"},
|
||||||
|
)
|
||||||
|
if len(fired) == 0 || len(hazards) == 0 {
|
||||||
|
t.Fatalf("want fired patterns + hazards, got %d patterns / %d hazards", len(fired), len(hazards))
|
||||||
|
}
|
||||||
|
has := func(id string) bool {
|
||||||
|
for _, pm := range fired {
|
||||||
|
if pm.PatternID == id {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !has("HP2201") {
|
||||||
|
t.Errorf("warewashing-specific HP2201 must fire via BuildProposerInput")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,174 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Semantic judgement over RECALL-SAFE dedup candidates (P2 slice 2). DEV-TIME,
|
||||||
|
// propose-only. The deterministic GT wall (proposer_screen.go) has already
|
||||||
|
// removed candidates that would drop recall or that credit different GT entries;
|
||||||
|
// the judge only adds an opinion on whether the survivors are truly the same
|
||||||
|
// hazard, plus a rationale, for the human review queue. It NEVER mutates anything.
|
||||||
|
//
|
||||||
|
// The judge is pluggable behind CandidateJudge so the runtime/tests stay
|
||||||
|
// deterministic (HeuristicJudge) while the dev-time CLI can plug in the
|
||||||
|
// non-deterministic LLM (LLMJudge over the shared llm.ProviderRegistry).
|
||||||
|
|
||||||
|
const (
|
||||||
|
VerdictDuplicate = "duplicate"
|
||||||
|
VerdictDistinct = "distinct"
|
||||||
|
VerdictUncertain = "uncertain"
|
||||||
|
)
|
||||||
|
|
||||||
|
// JudgedProposal is one candidate with its GT-wall result and the judge's opinion.
|
||||||
|
type JudgedProposal struct {
|
||||||
|
Candidate DedupCandidate `json:"candidate"`
|
||||||
|
Screen ScreenResult `json:"screen"`
|
||||||
|
Verdict string `json:"verdict"`
|
||||||
|
Confidence string `json:"confidence"`
|
||||||
|
Rationale string `json:"rationale"`
|
||||||
|
Judge string `json:"judge"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// CandidateJudge decides whether two near-duplicate patterns are the same hazard.
|
||||||
|
type CandidateJudge interface {
|
||||||
|
Name() string
|
||||||
|
Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (verdict, confidence, rationale string)
|
||||||
|
}
|
||||||
|
|
||||||
|
// HeuristicJudge is the deterministic default/fallback. It only ever returns "low"
|
||||||
|
// confidence — it is a placeholder for the LLM, and it deliberately punts to
|
||||||
|
// "uncertain" on the hard cases (low text overlap, shared measures) so the queue
|
||||||
|
// makes clear exactly where the LLM earns its keep.
|
||||||
|
type HeuristicJudge struct{}
|
||||||
|
|
||||||
|
func (HeuristicJudge) Name() string { return "heuristic" }
|
||||||
|
|
||||||
|
func (HeuristicJudge) Judge(_ context.Context, c DedupCandidate, _, _ PatternMatch) (string, string, string) {
|
||||||
|
switch {
|
||||||
|
case c.ScenarioJaccard >= 0.5 || (c.ZoneJaccard >= 0.5 && c.MeasureJaccard >= 0.5):
|
||||||
|
return VerdictDuplicate, "low", "structural: high scenario, or combined zone+measure, overlap"
|
||||||
|
case c.MeasureJaccard >= 0.99 && c.ZoneJaccard == 0 && c.ScenarioJaccard < 0.3:
|
||||||
|
return VerdictDistinct, "low", "structural: identical measures but no zone/scenario overlap — likely distinct hazards sharing generic measures"
|
||||||
|
default:
|
||||||
|
return VerdictUncertain, "low", "structural signal inconclusive — needs the LLM judge"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LLMJudge asks an offline model to make the semantic call. Non-deterministic, so
|
||||||
|
// it lives only in the dev-time tool, never in tests or the runtime. It degrades
|
||||||
|
// to "uncertain" on any transport or parse error — it must never break the run.
|
||||||
|
type LLMJudge struct {
|
||||||
|
Completer LLMCompleter
|
||||||
|
MachineClass string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (LLMJudge) Name() string { return "llm" }
|
||||||
|
|
||||||
|
func (j LLMJudge) Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (string, string, string) {
|
||||||
|
system, user := BuildJudgePrompt(j.MachineClass, a, b)
|
||||||
|
raw, err := j.Completer.Complete(ctx, system, user)
|
||||||
|
if err != nil {
|
||||||
|
return VerdictUncertain, "low", "LLM error: " + err.Error()
|
||||||
|
}
|
||||||
|
return parseJudgeJSON(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildJudgePrompt is the real LLM artifact — built and unit-tested deterministically
|
||||||
|
// even though the call itself is not. It frames the ISO 12100 same-vs-distinct
|
||||||
|
// question and forces a JSON answer.
|
||||||
|
func BuildJudgePrompt(machineClass string, a, b PatternMatch) (system, user string) {
|
||||||
|
system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
|
||||||
|
"Entscheide, ob zwei generierte Gefaehrdungen fuer DIESE Maschine DIESELBE Gefaehrdung " +
|
||||||
|
"beschreiben (Dublette) oder fachlich VERSCHIEDENE Gefaehrdungen sind, die nur zufaellig " +
|
||||||
|
"dieselben Schutzmassnahmen teilen. Verschieden, wenn Wirkort, Ausloeser oder " +
|
||||||
|
"Schadensmechanismus abweichen — auch bei gleicher Kategorie und gleichen Massnahmen. " +
|
||||||
|
"Antworte AUSSCHLIESSLICH als JSON: " +
|
||||||
|
`{"verdict":"duplicate|distinct|uncertain","confidence":"high|medium|low","rationale":"..."}.`
|
||||||
|
user = fmt.Sprintf(`Maschinenklasse: %s
|
||||||
|
|
||||||
|
Gefaehrdung A (%s):
|
||||||
|
Name: %s
|
||||||
|
Kategorie: %s
|
||||||
|
Zone: %s
|
||||||
|
Szenario: %s
|
||||||
|
Ausloeser: %s
|
||||||
|
Schaden: %s
|
||||||
|
Massnahmen: %s
|
||||||
|
|
||||||
|
Gefaehrdung B (%s):
|
||||||
|
Name: %s
|
||||||
|
Kategorie: %s
|
||||||
|
Zone: %s
|
||||||
|
Szenario: %s
|
||||||
|
Ausloeser: %s
|
||||||
|
Schaden: %s
|
||||||
|
Massnahmen: %s
|
||||||
|
|
||||||
|
Sind A und B dieselbe Gefaehrdung fuer diese Maschine?`,
|
||||||
|
machineClass,
|
||||||
|
a.PatternID, a.PatternName, primaryCat(a), a.ZoneDE, a.ScenarioDE, a.TriggerDE, a.HarmDE, strings.Join(a.SuggestedMeasureIDs, ", "),
|
||||||
|
b.PatternID, b.PatternName, primaryCat(b), b.ZoneDE, b.ScenarioDE, b.TriggerDE, b.HarmDE, strings.Join(b.SuggestedMeasureIDs, ", "))
|
||||||
|
return system, user
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseJudgeJSON(raw string) (verdict, confidence, rationale string) {
|
||||||
|
start, end := strings.Index(raw, "{"), strings.LastIndex(raw, "}")
|
||||||
|
if start < 0 || end <= start {
|
||||||
|
return VerdictUncertain, "low", "unparseable LLM output"
|
||||||
|
}
|
||||||
|
var v struct {
|
||||||
|
Verdict string `json:"verdict"`
|
||||||
|
Confidence string `json:"confidence"`
|
||||||
|
Rationale string `json:"rationale"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal([]byte(raw[start:end+1]), &v); err != nil {
|
||||||
|
return VerdictUncertain, "low", "unparseable LLM JSON: " + err.Error()
|
||||||
|
}
|
||||||
|
switch v.Verdict {
|
||||||
|
case VerdictDuplicate, VerdictDistinct, VerdictUncertain:
|
||||||
|
default:
|
||||||
|
v.Verdict = VerdictUncertain
|
||||||
|
}
|
||||||
|
if v.Confidence == "" {
|
||||||
|
v.Confidence = "low"
|
||||||
|
}
|
||||||
|
return v.Verdict, v.Confidence, v.Rationale
|
||||||
|
}
|
||||||
|
|
||||||
|
// LLMCompleter is the minimal text-in/text-out the LLM judge needs. Tests pass a
|
||||||
|
// stub; the dev-time tool passes a registry-backed adapter (NewRegistryCompleter).
|
||||||
|
type LLMCompleter interface {
|
||||||
|
Complete(ctx context.Context, system, user string) (string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type registryCompleter struct {
|
||||||
|
reg *llm.ProviderRegistry
|
||||||
|
model string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRegistryCompleter adapts the shared llm.ProviderRegistry to LLMCompleter so
|
||||||
|
// the proposer can reuse the platform's offline model wiring (e.g. self-hosted qwen).
|
||||||
|
func NewRegistryCompleter(reg *llm.ProviderRegistry, model string) LLMCompleter {
|
||||||
|
return ®istryCompleter{reg: reg, model: model}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rc *registryCompleter) Complete(ctx context.Context, system, user string) (string, error) {
|
||||||
|
resp, err := rc.reg.Chat(ctx, &llm.ChatRequest{
|
||||||
|
Model: rc.model,
|
||||||
|
Messages: []llm.Message{
|
||||||
|
{Role: "system", Content: system},
|
||||||
|
{Role: "user", Content: user},
|
||||||
|
},
|
||||||
|
Temperature: 0,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return resp.Message.Content, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,104 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestHeuristicJudge_Verdicts(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
zone, meas float64
|
||||||
|
scenario float64
|
||||||
|
wantVerdict string
|
||||||
|
}{
|
||||||
|
{"high scenario overlap -> duplicate", 0, 0.3, 0.6, VerdictDuplicate},
|
||||||
|
{"high zone+measure -> duplicate", 0.6, 0.6, 0.1, VerdictDuplicate},
|
||||||
|
{"identical measures, no text -> distinct", 0, 1.0, 0.0, VerdictDistinct},
|
||||||
|
{"shared measures, low text -> uncertain", 0, 0.67, 0.19, VerdictUncertain},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
c := DedupCandidate{ZoneJaccard: tt.zone, MeasureJaccard: tt.meas, ScenarioJaccard: tt.scenario}
|
||||||
|
v, conf, _ := HeuristicJudge{}.Judge(context.Background(), c, PatternMatch{}, PatternMatch{})
|
||||||
|
if v != tt.wantVerdict {
|
||||||
|
t.Errorf("verdict: want %s, got %s", tt.wantVerdict, v)
|
||||||
|
}
|
||||||
|
if conf != "low" {
|
||||||
|
t.Errorf("heuristic confidence must be low, got %s", conf)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildJudgePrompt_ContainsKeyFacts(t *testing.T) {
|
||||||
|
a := PatternMatch{PatternID: "HPa", PatternName: "Heisse Flaeche", HazardCats: []string{"thermal_hazard"},
|
||||||
|
ZoneDE: "Boiler", ScenarioDE: "Beruehrung heisser Boiler", SuggestedMeasureIDs: []string{"M071"}}
|
||||||
|
b := PatternMatch{PatternID: "HPb", PatternName: "Heisses Spuelgut", HazardCats: []string{"thermal_hazard"},
|
||||||
|
ZoneDE: "Spuelgut", ScenarioDE: "Beruehrung heisses Geschirr", SuggestedMeasureIDs: []string{"M071"}}
|
||||||
|
system, user := BuildJudgePrompt("Geschirrspuelmaschine", a, b)
|
||||||
|
|
||||||
|
for _, want := range []string{"EN ISO 12100", "JSON", "verdict"} {
|
||||||
|
if !strings.Contains(system, want) {
|
||||||
|
t.Errorf("system prompt missing %q", want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, want := range []string{"Geschirrspuelmaschine", "HPa", "HPb", "Boiler", "Spuelgut", "thermal_hazard"} {
|
||||||
|
if !strings.Contains(user, want) {
|
||||||
|
t.Errorf("user prompt missing %q", want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeCompleter struct {
|
||||||
|
out string
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeCompleter) Complete(_ context.Context, _, _ string) (string, error) { return f.out, f.err }
|
||||||
|
|
||||||
|
func TestLLMJudge_ParsesAndDegrades(t *testing.T) {
|
||||||
|
cand := DedupCandidate{KeepPattern: "HPa", DropPattern: "HPb"}
|
||||||
|
|
||||||
|
// Well-formed JSON, even wrapped in chatter, parses.
|
||||||
|
j := LLMJudge{Completer: fakeCompleter{out: "Sicher. {\"verdict\":\"distinct\",\"confidence\":\"high\",\"rationale\":\"andere Wirkorte\"}"}, MachineClass: "x"}
|
||||||
|
if v, conf, r := j.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictDistinct || conf != "high" || r != "andere Wirkorte" {
|
||||||
|
t.Errorf("parse: got %s/%s/%q", v, conf, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unknown verdict value normalises to uncertain.
|
||||||
|
j2 := LLMJudge{Completer: fakeCompleter{out: `{"verdict":"maybe","confidence":"medium","rationale":"x"}`}}
|
||||||
|
if v, _, _ := j2.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
|
||||||
|
t.Errorf("unknown verdict must normalise to uncertain, got %s", v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transport error degrades gracefully, never panics.
|
||||||
|
j3 := LLMJudge{Completer: fakeCompleter{err: errors.New("connection refused")}}
|
||||||
|
if v, _, r := j3.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain || !strings.Contains(r, "LLM error") {
|
||||||
|
t.Errorf("error path: got %s / %q", v, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Garbage (no JSON) degrades to uncertain.
|
||||||
|
j4 := LLMJudge{Completer: fakeCompleter{out: "no json here"}}
|
||||||
|
if v, _, _ := j4.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
|
||||||
|
t.Errorf("garbage must degrade to uncertain, got %s", v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRenderProposalQueue_ShowsActions(t *testing.T) {
|
||||||
|
proposals := []JudgedProposal{
|
||||||
|
{
|
||||||
|
Candidate: DedupCandidate{KeepPattern: "HP807", DropPattern: "HP033", Category: "update_failure", Score: 0.32},
|
||||||
|
Screen: ScreenResult{RecallBefore: 1, RecallAfter: 1},
|
||||||
|
Verdict: VerdictDuplicate, Confidence: "medium", Rationale: "same update failure", Judge: "llm",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
out := RenderProposalQueue("Geschirrspuelmaschine", proposals)
|
||||||
|
for _, want := range []string{"HP807", "HP033", "update_failure", "supersession", "Propose-only"} {
|
||||||
|
if !strings.Contains(out, want) {
|
||||||
|
t.Errorf("queue missing %q\n%s", want, out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RenderProposalQueue turns judged dedup proposals into the human-review queue
|
||||||
|
// (markdown). Deterministic. Nothing here applies a change — every entry is a
|
||||||
|
// suggestion for a human to confirm, edit, commit, and pin with a GT case.
|
||||||
|
func RenderProposalQueue(machine string, proposals []JudgedProposal) string {
|
||||||
|
var b strings.Builder
|
||||||
|
fmt.Fprintf(&b, "# Dedup proposal queue — %s\n\n", machine)
|
||||||
|
fmt.Fprintf(&b, "%d candidate(s) survived the deterministic GT wall. Propose-only — nothing is applied automatically.\n\n", len(proposals))
|
||||||
|
|
||||||
|
for i, p := range proposals {
|
||||||
|
c := p.Candidate
|
||||||
|
fmt.Fprintf(&b, "## %d. keep %s ⊃ drop %s [%s → %s (%s)]\n",
|
||||||
|
i+1, c.KeepPattern, c.DropPattern, p.Judge, p.Verdict, p.Confidence)
|
||||||
|
fmt.Fprintf(&b, "- category %s · score %.2f (measures %.0f%%, zone %.0f%%, scenario %.0f%%)\n",
|
||||||
|
c.Category, c.Score, c.MeasureJaccard*100, c.ZoneJaccard*100, c.ScenarioJaccard*100)
|
||||||
|
fmt.Fprintf(&b, "- GT recall %.1f%% → %.1f%% when %s is dropped (wall: %s)\n",
|
||||||
|
p.Screen.RecallBefore*100, p.Screen.RecallAfter*100, c.DropPattern, wallNote(p.Screen))
|
||||||
|
fmt.Fprintf(&b, "- keep: %s\n- drop: %s\n", c.KeepHazardName, c.DropName)
|
||||||
|
fmt.Fprintf(&b, "- judge rationale: %s\n", p.Rationale)
|
||||||
|
fmt.Fprintf(&b, "- suggested action: %s\n\n", suggestedAction(p))
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func wallNote(s ScreenResult) string {
|
||||||
|
if s.DistinctGT {
|
||||||
|
return fmt.Sprintf("distinct GT %s vs %s", s.KeepGT, s.DropGT)
|
||||||
|
}
|
||||||
|
return "recall-safe"
|
||||||
|
}
|
||||||
|
|
||||||
|
func suggestedAction(p JudgedProposal) string {
|
||||||
|
switch p.Verdict {
|
||||||
|
case VerdictDuplicate:
|
||||||
|
return fmt.Sprintf("add %s to a supersession set, then a human confirms + commits + pins a GT case", p.Candidate.DropPattern)
|
||||||
|
case VerdictDistinct:
|
||||||
|
return "keep both — judge considers them distinct hazards"
|
||||||
|
default:
|
||||||
|
return "needs human (or higher-confidence LLM) review — no automatic action"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,61 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
import "github.com/google/uuid"
|
||||||
|
|
||||||
|
// ScreenResult is the deterministic GT verdict for one proposed supersession.
|
||||||
|
type ScreenResult struct {
|
||||||
|
RecallBefore float64 `json:"recall_before"`
|
||||||
|
RecallAfter float64 `json:"recall_after"`
|
||||||
|
KeepGT string `json:"keep_gt,omitempty"` // GT entry the keeper credits (if any)
|
||||||
|
DropGT string `json:"drop_gt,omitempty"` // GT entry the drop credits (if any)
|
||||||
|
DistinctGT bool `json:"distinct_gt"` // keep & drop credit DIFFERENT GT entries -> distinct hazards
|
||||||
|
Safe bool `json:"safe"` // recall preserved AND not distinct
|
||||||
|
}
|
||||||
|
|
||||||
|
// ScreenSupersession is the WALL between "propose" and "decide". A proposal is
|
||||||
|
// safe only if BOTH deterministic checks pass:
|
||||||
|
//
|
||||||
|
// 1. RECALL is not reduced when the drop-hazard (and its mitigations) is removed
|
||||||
|
// — otherwise the drop is load-bearing for GT coverage.
|
||||||
|
// 2. The two hazards do NOT credit DIFFERENT ground-truth entries. Recall alone
|
||||||
|
// is necessary but not sufficient: two genuinely distinct hazards that share
|
||||||
|
// the same measures (e.g. hot boiler surface vs hot ware on unloading) keep
|
||||||
|
// recall at 100% when one is dropped, yet must NOT be merged. If keep and
|
||||||
|
// drop each match a different GT entry, they are distinct.
|
||||||
|
//
|
||||||
|
// Whatever survives both is still only RECALL-SAFE — a candidate for a human (and
|
||||||
|
// in slice 2, an LLM) to confirm semantically. Deterministic; reuses
|
||||||
|
// CompareBenchmark; touches neither the library nor the runtime.
|
||||||
|
func ScreenSupersession(gt *GroundTruth, hazards []Hazard, mits []Mitigation, keepHazardName, dropHazardName string) ScreenResult {
|
||||||
|
before := CompareBenchmark(gt, hazards, mits)
|
||||||
|
|
||||||
|
gtOf := map[string]string{}
|
||||||
|
for _, p := range before.MatchedPairs {
|
||||||
|
gtOf[p.EngineHazard.Name] = p.GTEntry.Nr
|
||||||
|
}
|
||||||
|
keepGT, dropGT := gtOf[keepHazardName], gtOf[dropHazardName]
|
||||||
|
distinct := keepGT != "" && dropGT != "" && keepGT != dropGT
|
||||||
|
|
||||||
|
kept := make([]Hazard, 0, len(hazards))
|
||||||
|
dropped := map[uuid.UUID]bool{}
|
||||||
|
for _, h := range hazards {
|
||||||
|
if h.Name == dropHazardName {
|
||||||
|
dropped[h.ID] = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
kept = append(kept, h)
|
||||||
|
}
|
||||||
|
keptMits := make([]Mitigation, 0, len(mits))
|
||||||
|
for _, m := range mits {
|
||||||
|
if !dropped[m.HazardID] {
|
||||||
|
keptMits = append(keptMits, m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
after := CompareBenchmark(gt, kept, keptMits)
|
||||||
|
|
||||||
|
return ScreenResult{
|
||||||
|
RecallBefore: before.CoverageScore, RecallAfter: after.CoverageScore,
|
||||||
|
KeepGT: keepGT, DropGT: dropGT, DistinctGT: distinct,
|
||||||
|
Safe: after.CoverageScore >= before.CoverageScore && !distinct,
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -160,6 +160,7 @@ func (s *Store) ListHazards(ctx context.Context, projectID uuid.UUID) ([]Hazard,
|
|||||||
hazards = append(hazards, h)
|
hazards = append(hazards, h)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SortHazardsByISO12100(hazards)
|
||||||
return hazards, nil
|
return hazards, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -110,9 +110,10 @@ type domainDef struct {
|
|||||||
// Deterministic order (slice, not map) — important for stable classification + tests.
|
// Deterministic order (slice, not map) — important for stable classification + tests.
|
||||||
var domains = []domainDef{
|
var domains = []domainDef{
|
||||||
{"data_protection",
|
{"data_protection",
|
||||||
[]string{"DSGVO", "GDPR", "BDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"},
|
[]string{"DSGVO", "GDPR", "BDSG", "TDDDG", "TTDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"},
|
||||||
[]string{"personenbezogen", "betroffene", "datenschutz", "datenschutzbeauftrag", "dsb",
|
[]string{"personenbezogen", "betroffene", "datenschutz", "datenschutzbeauftrag", "dsb",
|
||||||
"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeiter"}},
|
"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeit",
|
||||||
|
"cookie", "endeinrichtung", "endgerät", "endgeraet", "tracking"}},
|
||||||
{"cyber",
|
{"cyber",
|
||||||
[]string{"CRA", "NIS2", "NIS-2", "ENISA", "DORA", "EUCC"},
|
[]string{"CRA", "NIS2", "NIS-2", "ENISA", "DORA", "EUCC"},
|
||||||
[]string{"security update", "sicherheitsupdate", "sicherheitsaktualisierung", "schwachstelle", "sbom",
|
[]string{"security update", "sicherheitsupdate", "sicherheitsaktualisierung", "schwachstelle", "sbom",
|
||||||
@@ -126,6 +127,16 @@ var domains = []domainDef{
|
|||||||
nil},
|
nil},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// euPrimaryDomains are domains whose PRIMARY binding act is an EU regulation/directive
|
||||||
|
// (DSGVO, CRA/NIS2, AI Act, MaschinenVO). In these domains a NATIONAL implementing law
|
||||||
|
// (e.g. BDSG) is subsidiary for general questions — see nationalSubsidiarityPenalty.
|
||||||
|
var euPrimaryDomains = map[string]bool{
|
||||||
|
"data_protection": true,
|
||||||
|
"cyber": true,
|
||||||
|
"ai": true,
|
||||||
|
"product_safety": true,
|
||||||
|
}
|
||||||
|
|
||||||
func queryDomain(query string) string {
|
func queryDomain(query string) string {
|
||||||
ql := strings.ToLower(query)
|
ql := strings.ToLower(query)
|
||||||
for _, d := range domains {
|
for _, d := range domains {
|
||||||
@@ -135,6 +146,16 @@ func queryDomain(query string) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Fallback: an explicit regulation mention (e.g. "DSGVO", "BDSG", "CRA") also signals the
|
||||||
|
// domain — so a question phrased around the act ("... gilt die DSGVO ...") is scoped even
|
||||||
|
// without a topical keyword. Keyword match wins first (more specific).
|
||||||
|
for _, d := range domains {
|
||||||
|
for _, reg := range d.regs {
|
||||||
|
if strings.Contains(ql, strings.ToLower(reg)) {
|
||||||
|
return d.name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -180,6 +201,11 @@ var topics = []topicDef{
|
|||||||
{[]string{"bussgeld", "geldbusse"}, []string{"Art. 83"}},
|
{[]string{"bussgeld", "geldbusse"}, []string{"Art. 83"}},
|
||||||
{[]string{"security update", "sicherheitsupdate", "schwachstelle", "sbom", "cybersicherheitsanforderung"}, []string{"CRA Anhang I"}},
|
{[]string{"security update", "sicherheitsupdate", "schwachstelle", "sbom", "cybersicherheitsanforderung"}, []string{"CRA Anhang I"}},
|
||||||
{[]string{"meldepflicht", "sicherheitsvorfall"}, []string{"Art. 14 CRA"}},
|
{[]string{"meldepflicht", "sicherheitsvorfall"}, []string{"Art. 14 CRA"}},
|
||||||
|
// ePrivacy / cookies: § 25 TDDDG (ex-TTDSG) is lex specialis for terminal-equipment access /
|
||||||
|
// cookie consent. Co-primary on a cookie/tracking query, so the subsidiarity rule does NOT
|
||||||
|
// demote it like general-DP DE law subsidiary to the DSGVO. Keywords are cookie-specific
|
||||||
|
// (NOT bare "Einwilligung") so a general consent question still resolves to Art. 7 DSGVO.
|
||||||
|
{[]string{"cookie", "endeinrichtung", "endgerät", "endgeraet", "tracking", "speicherung von informationen", "zugriff auf informationen"}, []string{"§ 25 TDDDG"}},
|
||||||
}
|
}
|
||||||
|
|
||||||
// resultMatchesTopic reports whether the result is a preferred norm of a topic the query hits.
|
// resultMatchesTopic reports whether the result is a preferred norm of a topic the query hits.
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ const (
|
|||||||
domainMatchGain = 0.15
|
domainMatchGain = 0.15
|
||||||
offDomainPenalty = 0.10 // off-domain binding (demoted, not removed)
|
offDomainPenalty = 0.10 // off-domain binding (demoted, not removed)
|
||||||
scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
|
scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
|
||||||
|
subsidiarityPen = 0.18 // national implementing law (BDSG) on a general EU-primary question: SOFT demote, not exclusion
|
||||||
topicGain = 0.18 // amplifier only
|
topicGain = 0.18 // amplifier only
|
||||||
supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
|
supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
|
||||||
intentLiftGain = 0.10 // epsilon a qualifying interpretative source is lifted ABOVE the best binding
|
intentLiftGain = 0.10 // epsilon a qualifying interpretative source is lifted ABOVE the best binding
|
||||||
@@ -102,6 +103,15 @@ func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign
|
|||||||
if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" {
|
if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" {
|
||||||
score -= scopePenalty
|
score -= scopePenalty
|
||||||
}
|
}
|
||||||
|
// Subsidiarity: a national implementing law (DE binding, e.g. BDSG) is subsidiary to the
|
||||||
|
// primary EU act for GENERAL questions in an EU-primary domain — UNLESS the query hits a
|
||||||
|
// topic where the national norm is co-primary (DSB §38, special categories §22, ...). The
|
||||||
|
// topic boost below lifts those; here we only SOFT-demote the non-topic national norm, so
|
||||||
|
// it stays visible and can still win on a strongly matching topic. No hard exclusion.
|
||||||
|
if euPrimaryDomains[qDomain] && info.sourceClass == "binding_law" &&
|
||||||
|
info.jurisdiction == "DE" && !resultMatchesTopic(query, r) {
|
||||||
|
score -= subsidiarityPen
|
||||||
|
}
|
||||||
if resultMatchesTopic(query, r) {
|
if resultMatchesTopic(query, r) {
|
||||||
score += topicGain // Verstaerker, kein Override
|
score += topicGain // Verstaerker, kein Override
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,6 +72,95 @@ func TestRerankByAuthority_Acceptance(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Subsidiarity (KB-2026.1 BDSG-pilot regression): a national implementing § that is NOT a
|
||||||
|
// co-primary topic norm must not outrank the primary DSGVO article on a general question.
|
||||||
|
t.Run("subsidiarity dp_05: BDSG §23 below DSGVO Art.6 (Rechtsgrundlage)", func(t *testing.T) {
|
||||||
|
in := []LegalSearchResult{
|
||||||
|
bindingRes("§ 23 BDSG", "BDSG", "DE", 0.70),
|
||||||
|
bindingRes("Art. 6 DSGVO", "DSGVO", "EU", 0.66),
|
||||||
|
}
|
||||||
|
out := rerankByAuthority("Welche Rechtsgrundlagen erlauben eine Verarbeitung personenbezogener Daten?", in)
|
||||||
|
if out[0].RegulationShort != "DSGVO" {
|
||||||
|
t.Fatalf("DSGVO Art.6 must beat general BDSG §, got %q", out[0].ArticleLabel)
|
||||||
|
}
|
||||||
|
if len(out) != 2 {
|
||||||
|
t.Fatalf("BDSG must stay visible (soft demote), got len=%d", len(out))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("subsidiarity dp_08: BDSG §70 below DSGVO Art.28 (Auftragsverarbeitung)", func(t *testing.T) {
|
||||||
|
in := []LegalSearchResult{
|
||||||
|
bindingRes("§ 70 BDSG", "BDSG", "DE", 0.70), // Teil 3 → scope + subsidiarity
|
||||||
|
bindingRes("Art. 28 DSGVO", "DSGVO", "EU", 0.66),
|
||||||
|
}
|
||||||
|
out := rerankByAuthority("Was muss ein Auftragsverarbeitungsvertrag enthalten?", in)
|
||||||
|
if out[0].RegulationShort != "DSGVO" {
|
||||||
|
t.Fatalf("DSGVO Art.28 must beat BDSG §70, got %q", out[0].ArticleLabel)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("subsidiarity dp_11: BDSG §22 below DSGVO Art.32 on a TOM question", func(t *testing.T) {
|
||||||
|
in := []LegalSearchResult{
|
||||||
|
bindingRes("§ 22 BDSG", "BDSG", "DE", 0.70),
|
||||||
|
bindingRes("Art. 32 DSGVO", "DSGVO", "EU", 0.66),
|
||||||
|
}
|
||||||
|
out := rerankByAuthority("Welche technischen und organisatorischen Massnahmen verlangt das Datenschutzrecht?", in)
|
||||||
|
if out[0].RegulationShort != "DSGVO" {
|
||||||
|
t.Fatalf("DSGVO Art.32 must beat BDSG §22 on a non-topic TOM question, got %q", out[0].ArticleLabel)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("cr_07: a 'DSGVO' mention scopes the domain so BDSG Teil-3 §64 is demoted", func(t *testing.T) {
|
||||||
|
in := []LegalSearchResult{
|
||||||
|
bindingRes("§ 64 BDSG", "BDSG", "DE", 0.70), // Teil 3 (law enforcement)
|
||||||
|
bindingRes("Art. 32 DSGVO", "DSGVO", "EU", 0.66),
|
||||||
|
}
|
||||||
|
// Query has no DP keyword but names the DSGVO → domain fallback scopes it data_protection,
|
||||||
|
// so scope+subsidiarity demote the law-enforcement § below the primary norm.
|
||||||
|
out := rerankByAuthority("Welche rechtliche Grundlage gilt fuer technische und organisatorische Massnahmen - DSGVO oder ein Standard?", in)
|
||||||
|
if out[0].RegulationShort != "DSGVO" {
|
||||||
|
t.Fatalf("DSGVO must win on a DSGVO-mention question, got %q", out[0].ArticleLabel)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("ePrivacy: a cookie query lifts §25 TDDDG above DSGVO consent (lex specialis topic)", func(t *testing.T) {
|
||||||
|
in := []LegalSearchResult{
|
||||||
|
bindingRes("Art. 7 DSGVO", "DSGVO", "EU", 0.70), // higher semantic
|
||||||
|
bindingRes("§ 25 TDDDG", "TDDDG", "DE", 0.66),
|
||||||
|
}
|
||||||
|
out := rerankByAuthority("Wann ist eine Einwilligung fuer das Speichern von Cookies auf Endgeraeten erforderlich?", in)
|
||||||
|
if out[0].RegulationShort != "TDDDG" {
|
||||||
|
t.Fatalf("§25 TDDDG must win a cookie question (lex specialis topic), got %q", out[0].ArticleLabel)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("a general consent question still resolves to DSGVO, not §25 TDDDG", func(t *testing.T) {
|
||||||
|
in := []LegalSearchResult{
|
||||||
|
bindingRes("§ 25 TDDDG", "TDDDG", "DE", 0.70), // higher semantic but no cookie topic
|
||||||
|
bindingRes("Art. 7 DSGVO", "DSGVO", "EU", 0.66),
|
||||||
|
}
|
||||||
|
out := rerankByAuthority("Welche Anforderungen gelten an eine wirksame Einwilligung?", in)
|
||||||
|
if out[0].RegulationShort != "DSGVO" {
|
||||||
|
t.Fatalf("a general consent question must resolve to DSGVO (TDDDG demoted), got %q", out[0].ArticleLabel)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("co-primary dp_01: BDSG §38 stays top on a DSB question (national special rule)", func(t *testing.T) {
|
||||||
|
in := []LegalSearchResult{
|
||||||
|
bindingRes("§ 38 BDSG", "BDSG", "DE", 0.66),
|
||||||
|
bindingRes("Art. 37 DSGVO", "DSGVO", "EU", 0.64),
|
||||||
|
}
|
||||||
|
out := rerankByAuthority("Ab wann muss ein Datenschutzbeauftragter benannt werden?", in)
|
||||||
|
// DSB topic → §38 is co-primary (topic-matched, NOT subsidiarity-demoted) and keeps its
|
||||||
|
// semantic lead; Art. 37 stays a close second. Both remain top-2.
|
||||||
|
if out[0].RegulationShort != "BDSG" {
|
||||||
|
t.Fatalf("BDSG §38 (DSB co-primary) must stay top, got %q", out[0].ArticleLabel)
|
||||||
|
}
|
||||||
|
if out[1].RegulationShort != "DSGVO" {
|
||||||
|
t.Fatalf("Art. 37 DSGVO must stay co-primary second, got %q", out[1].ArticleLabel)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
t.Run("nothing is dropped and topic amplifies", func(t *testing.T) {
|
t.Run("nothing is dropped and topic amplifies", func(t *testing.T) {
|
||||||
in := []LegalSearchResult{
|
in := []LegalSearchResult{
|
||||||
guidanceRes("ENISA", "ENISA", 0.72),
|
guidanceRes("ENISA", "ENISA", 0.72),
|
||||||
|
|||||||
@@ -0,0 +1,89 @@
|
|||||||
|
package ucca
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
)
|
||||||
|
|
||||||
|
// graphCallerRel resolves a path relative to THIS source file (build-time location), so the
|
||||||
|
// graph data is findable under `go test` (cwd = package dir) regardless of working directory.
|
||||||
|
// In a built container the source is gone, so cwd-relative candidates carry the load instead.
|
||||||
|
func graphCallerRel(rel string) string {
|
||||||
|
_, file, _, ok := runtime.Caller(0)
|
||||||
|
if !ok {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return filepath.Join(filepath.Dir(file), rel)
|
||||||
|
}
|
||||||
|
|
||||||
|
// firstExisting returns the first candidate path that exists with the requested kind (dir vs
|
||||||
|
// file). Empty candidates (e.g. unset env overrides) are skipped.
|
||||||
|
func firstExisting(candidates []string, wantDir bool) string {
|
||||||
|
for _, p := range candidates {
|
||||||
|
if p == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
info, err := os.Stat(p)
|
||||||
|
if err != nil || info.IsDir() != wantDir {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadComplianceGraph loads the file-backed Compliance Execution Graph: the Registry join-key
|
||||||
|
// contract (obligations/obligation_join_keys.json — owned by the Obligation session) + our
|
||||||
|
// curated, accepted control mappings + evidence requirements. Locations are resolved across
|
||||||
|
// three layouts: dev (cwd = ai-compliance-sdk/, canonical contract at ../obligations), container
|
||||||
|
// (WORKDIR /app, data/ copied in incl. a synced data/obligations/ copy) and `go test`
|
||||||
|
// (cwd = package dir, via graphCallerRel). Fail-closed: a missing/invalid source returns an
|
||||||
|
// error so the handler serves 503 — never a half-built graph.
|
||||||
|
//
|
||||||
|
// NOTE: data/obligations/obligation_join_keys.json is a SYNCED COPY of the repo-root contract
|
||||||
|
// (the canonical owner is the Obligation session). Re-sync it when the Registry grows; dev/test
|
||||||
|
// prefer the canonical repo-root path, only the container falls back to the copy.
|
||||||
|
func LoadComplianceGraph() (*ObligationJoinKeys, *ControlMappingSet, *EvidenceRequirementSet, error) {
|
||||||
|
joinPath := firstExisting([]string{
|
||||||
|
os.Getenv("BP_OBLIGATION_JOIN_KEYS"),
|
||||||
|
"../obligations/obligation_join_keys.json",
|
||||||
|
graphCallerRel("../../../obligations/obligation_join_keys.json"),
|
||||||
|
"data/obligations/obligation_join_keys.json",
|
||||||
|
graphCallerRel("../../data/obligations/obligation_join_keys.json"),
|
||||||
|
}, false)
|
||||||
|
if joinPath == "" {
|
||||||
|
return nil, nil, nil, fmt.Errorf("obligation_join_keys.json not found in any candidate path")
|
||||||
|
}
|
||||||
|
mapDir := firstExisting([]string{
|
||||||
|
os.Getenv("BP_CONTROL_MAPPINGS_DIR"),
|
||||||
|
"data/control_mappings",
|
||||||
|
graphCallerRel("../../data/control_mappings"),
|
||||||
|
}, true)
|
||||||
|
if mapDir == "" {
|
||||||
|
return nil, nil, nil, fmt.Errorf("control_mappings dir not found in any candidate path")
|
||||||
|
}
|
||||||
|
evDir := firstExisting([]string{
|
||||||
|
os.Getenv("BP_EVIDENCE_DIR"),
|
||||||
|
"data/evidence_requirements",
|
||||||
|
graphCallerRel("../../data/evidence_requirements"),
|
||||||
|
}, true)
|
||||||
|
if evDir == "" {
|
||||||
|
return nil, nil, nil, fmt.Errorf("evidence_requirements dir not found in any candidate path")
|
||||||
|
}
|
||||||
|
|
||||||
|
joins, err := LoadObligationJoinKeys(joinPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, nil, fmt.Errorf("load join keys (%s): %w", joinPath, err)
|
||||||
|
}
|
||||||
|
mappings, err := LoadControlMappings(mapDir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, nil, fmt.Errorf("load control mappings (%s): %w", mapDir, err)
|
||||||
|
}
|
||||||
|
evidence, err := LoadEvidenceRequirements(evDir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, nil, fmt.Errorf("load evidence (%s): %w", evDir, err)
|
||||||
|
}
|
||||||
|
return joins, mappings, evidence, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
package ucca
|
||||||
|
|
||||||
|
// ObligationStatus is the Advisor's vertical slice over the compliance graph for ONE legal
|
||||||
|
// obligation: which accepted controls satisfy it, what evidence they require, what's missing,
|
||||||
|
// and the resulting status. The point is "the required evidence is (not) present", not "a
|
||||||
|
// document exists". citation_spans is pending until the Legal-Knowledge-Graph session attaches
|
||||||
|
// them to the obligation (the upper half of the bridge).
|
||||||
|
type ObligationStatus struct {
|
||||||
|
ObligationID string `json:"obligation_id"`
|
||||||
|
LegalBasis []string `json:"legal_basis"` // the obligation's citation_units
|
||||||
|
Status string `json:"status"` // erfuellt | offen | unklar
|
||||||
|
Controls []ObligationControlStatus `json:"controls"`
|
||||||
|
CitationSpans string `json:"citation_spans"` // "pending" until the registry fills them
|
||||||
|
}
|
||||||
|
|
||||||
|
// ObligationControlStatus is one control under an obligation with its evidence picture.
|
||||||
|
type ObligationControlStatus struct {
|
||||||
|
Framework string `json:"framework"`
|
||||||
|
Control string `json:"control"`
|
||||||
|
MappingType string `json:"mapping_type"`
|
||||||
|
RequiredEvidence []EvidenceRequirement `json:"required_evidence"`
|
||||||
|
MissingEvidence []EvidenceRequirement `json:"missing_evidence"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AssessObligationStatus traverses obligation_id -> (citation_unit) -> accepted Controls ->
|
||||||
|
// required Evidence -> Status. hasEvidence reports whether a given (framework, control,
|
||||||
|
// evidence_type) is already collected; pass nil in the MVP (no collection yet) -> everything
|
||||||
|
// required is missing and the status is "offen". Unknown or unmapped obligation -> "unklar".
|
||||||
|
func AssessObligationStatus(joins *ObligationJoinKeys, mappings *ControlMappingSet, evidence *EvidenceRequirementSet, obligationID string, hasEvidence func(framework, control, evidenceType string) bool) ObligationStatus {
|
||||||
|
ob := joins.FindObligation(obligationID)
|
||||||
|
if ob == nil {
|
||||||
|
return ObligationStatus{ObligationID: obligationID, Status: "unklar", CitationSpans: "pending"}
|
||||||
|
}
|
||||||
|
st := ObligationStatus{
|
||||||
|
ObligationID: obligationID,
|
||||||
|
LegalBasis: ob.CitationUnits,
|
||||||
|
CitationSpans: "pending",
|
||||||
|
Controls: []ObligationControlStatus{},
|
||||||
|
}
|
||||||
|
ctrls := AcceptedControlsForObligation(*ob, mappings)
|
||||||
|
if len(ctrls) == 0 {
|
||||||
|
st.Status = "unklar" // no accepted control reaches it — we cannot assess
|
||||||
|
return st
|
||||||
|
}
|
||||||
|
anyMissing := false
|
||||||
|
for _, m := range ctrls {
|
||||||
|
req := evidence.RequiredFor(m.TargetFramework, m.TargetControl)
|
||||||
|
missing := make([]EvidenceRequirement, 0, len(req))
|
||||||
|
for _, e := range req {
|
||||||
|
if hasEvidence == nil || !hasEvidence(e.Framework, e.Control, e.EvidenceType) {
|
||||||
|
missing = append(missing, e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(missing) > 0 {
|
||||||
|
anyMissing = true
|
||||||
|
}
|
||||||
|
st.Controls = append(st.Controls, ObligationControlStatus{
|
||||||
|
Framework: m.TargetFramework,
|
||||||
|
Control: m.TargetControl,
|
||||||
|
MappingType: m.MappingType,
|
||||||
|
RequiredEvidence: req,
|
||||||
|
MissingEvidence: missing,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if anyMissing {
|
||||||
|
st.Status = "offen"
|
||||||
|
} else {
|
||||||
|
st.Status = "erfuellt"
|
||||||
|
}
|
||||||
|
return st
|
||||||
|
}
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
package ucca
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func loadGraph(t *testing.T) (*ObligationJoinKeys, *ControlMappingSet, *EvidenceRequirementSet) {
|
||||||
|
t.Helper()
|
||||||
|
joins, err := LoadObligationJoinKeys("../../../obligations/obligation_join_keys.json")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("join keys: %v", err)
|
||||||
|
}
|
||||||
|
maps, err := LoadControlMappings("../../data/control_mappings")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("mappings: %v", err)
|
||||||
|
}
|
||||||
|
ev, err := LoadEvidenceRequirements("../../data/evidence_requirements")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("evidence: %v", err)
|
||||||
|
}
|
||||||
|
return joins, maps, ev
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAssessObligationStatus(t *testing.T) {
|
||||||
|
joins, maps, ev := loadGraph(t)
|
||||||
|
|
||||||
|
// covered obligation, no evidence collected yet (MVP) -> offen
|
||||||
|
st := AssessObligationStatus(joins, maps, ev, "user_authentication_required", nil)
|
||||||
|
if st.Status != "offen" {
|
||||||
|
t.Errorf("want offen, got %q", st.Status)
|
||||||
|
}
|
||||||
|
if len(st.Controls) == 0 {
|
||||||
|
t.Fatal("expected controls for a covered obligation")
|
||||||
|
}
|
||||||
|
for _, c := range st.Controls {
|
||||||
|
if len(c.MissingEvidence) != len(c.RequiredEvidence) {
|
||||||
|
t.Error("MVP: all required evidence should be missing")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.Logf("DURCHSTICH user_authentication_required: status=%s legal_basis=%v citation_spans=%s",
|
||||||
|
st.Status, st.LegalBasis, st.CitationSpans)
|
||||||
|
for _, c := range st.Controls {
|
||||||
|
t.Logf(" %s %s (%s): %d required evidence, %d missing", c.Framework, c.Control, c.MappingType, len(c.RequiredEvidence), len(c.MissingEvidence))
|
||||||
|
}
|
||||||
|
|
||||||
|
// all evidence present -> erfuellt
|
||||||
|
st2 := AssessObligationStatus(joins, maps, ev, "user_authentication_required", func(f, c, et string) bool { return true })
|
||||||
|
if st2.Status != "erfuellt" {
|
||||||
|
t.Errorf("want erfuellt with all evidence present, got %q", st2.Status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// uncovered obligation (no accepted control reaches it) -> unklar
|
||||||
|
if st3 := AssessObligationStatus(joins, maps, ev, "sbom_creation", nil); st3.Status != "unklar" {
|
||||||
|
t.Errorf("uncovered sbom_creation: want unklar, got %q", st3.Status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// unknown obligation_id -> unklar
|
||||||
|
if st4 := AssessObligationStatus(joins, maps, ev, "does_not_exist", nil); st4.Status != "unklar" {
|
||||||
|
t.Errorf("unknown obligation: want unklar, got %q", st4.Status)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -19,13 +19,14 @@ import (
|
|||||||
// professional statement, not an AI guess. The retriever's score lives only in the rationale
|
// professional statement, not an AI guess. The retriever's score lives only in the rationale
|
||||||
// of a candidate, never as structured truth.
|
// of a candidate, never as structured truth.
|
||||||
type ControlMapping struct {
|
type ControlMapping struct {
|
||||||
SourceNorm string `json:"source_norm"` // e.g. "CRA Annex I Part I (2)(c)"
|
SourceNorm string `json:"source_norm"` // e.g. "CRA Annex I Part I (2)(c)"
|
||||||
SourceRole string `json:"source_role"` // source_role of the norm (operational_requirement, ...)
|
SourceRole string `json:"source_role"` // source_role of the norm (operational_requirement, ...)
|
||||||
TargetFramework string `json:"target_framework"` // e.g. "OWASP ASVS"
|
TargetFramework string `json:"target_framework"` // e.g. "OWASP ASVS"
|
||||||
TargetControl string `json:"target_control"` // e.g. "V6.3.1"
|
TargetControl string `json:"target_control"` // e.g. "V6.3.1"
|
||||||
MappingType string `json:"mapping_type"` // supports | partially_supports | implements | related | contradicts
|
MappingType string `json:"mapping_type"` // primary_implementation | implements | supports | partially_supports | related | contradicts
|
||||||
MappingStatus string `json:"mapping_status"` // candidate | accepted | rejected | superseded
|
MappingStatus string `json:"mapping_status"` // candidate | accepted | rejected | superseded
|
||||||
Provenance string `json:"provenance"` // retriever_candidate | human_curated | rule_based
|
Provenance string `json:"provenance"` // retriever_candidate | human_curated | rule_based
|
||||||
|
ObligationID string `json:"obligation_id,omitempty"` // stable cross-session join key (Obligation Registry); empty until adopted, citation_unit is the interim bridge
|
||||||
Rationale string `json:"rationale"`
|
Rationale string `json:"rationale"`
|
||||||
ReviewedBy string `json:"reviewed_by,omitempty"` // who decided (human or rule id)
|
ReviewedBy string `json:"reviewed_by,omitempty"` // who decided (human or rule id)
|
||||||
ReviewDate string `json:"review_date,omitempty"` // YYYY-MM-DD
|
ReviewDate string `json:"review_date,omitempty"` // YYYY-MM-DD
|
||||||
@@ -35,7 +36,7 @@ type ControlMapping struct {
|
|||||||
|
|
||||||
// Allowed enum values — the deterministic "rule" layer that keeps the curated store clean.
|
// Allowed enum values — the deterministic "rule" layer that keeps the curated store clean.
|
||||||
var (
|
var (
|
||||||
mappingTypeValues = map[string]bool{"supports": true, "partially_supports": true, "implements": true, "related": true, "contradicts": true}
|
mappingTypeValues = map[string]bool{"primary_implementation": true, "implements": true, "supports": true, "partially_supports": true, "related": true, "contradicts": true}
|
||||||
mappingStatusValues = map[string]bool{"candidate": true, "accepted": true, "rejected": true, "superseded": true}
|
mappingStatusValues = map[string]bool{"candidate": true, "accepted": true, "rejected": true, "superseded": true}
|
||||||
provenanceValues = map[string]bool{"retriever_candidate": true, "human_curated": true, "rule_based": true}
|
provenanceValues = map[string]bool{"retriever_candidate": true, "human_curated": true, "rule_based": true}
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -0,0 +1,172 @@
|
|||||||
|
package ucca
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ObligationKey is one entry of the Obligation Registry's cross-session contract
|
||||||
|
// (obligations/obligation_join_keys.json). obligation_id is the STABLE join key — assigned
|
||||||
|
// only by the Registry, never minted here. citation_units are the interim bridge until our
|
||||||
|
// ControlMapping adopts obligation_id directly.
|
||||||
|
type ObligationKey struct {
|
||||||
|
ObligationID string `json:"obligation_id"`
|
||||||
|
Regulation string `json:"regulation"`
|
||||||
|
Family string `json:"family"`
|
||||||
|
Tier string `json:"tier"`
|
||||||
|
CitationUnits []string `json:"citation_units"`
|
||||||
|
SourceRole string `json:"source_role"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ObligationJoinKeys is the loaded contract + a citation-unit index for the interim join.
|
||||||
|
type ObligationJoinKeys struct {
|
||||||
|
SchemaVersion string `json:"schema_version"`
|
||||||
|
Count int `json:"count"`
|
||||||
|
ObligationIDs []ObligationKey `json:"obligation_ids"`
|
||||||
|
byCitationKey map[string][]string
|
||||||
|
}
|
||||||
|
|
||||||
|
var citationRefRe = regexp.MustCompile(`\(([0-9a-zA-Z]+)\)`)
|
||||||
|
|
||||||
|
// citationUnitKey normalizes a CRA Annex I reference for the INTERIM citation_unit join, so
|
||||||
|
// our "CRA Annex I Part I (2)(c)" and the Registry's "Annex I (2)(c)" collapse to the same
|
||||||
|
// key ("i:2.c"). Interim only — superseded by the stable obligation_id once adopted.
|
||||||
|
func citationUnitKey(cu string) string {
|
||||||
|
low := strings.ToLower(cu)
|
||||||
|
part := ""
|
||||||
|
switch {
|
||||||
|
case strings.Contains(low, "part ii"):
|
||||||
|
part = "ii"
|
||||||
|
case strings.Contains(low, "part i"), strings.Contains(low, "(2)"):
|
||||||
|
part = "i" // CRA Annex I Part I = the (2)(x) essential requirements
|
||||||
|
}
|
||||||
|
var refs []string
|
||||||
|
for _, m := range citationRefRe.FindAllStringSubmatch(cu, -1) {
|
||||||
|
refs = append(refs, strings.ToLower(m[1]))
|
||||||
|
}
|
||||||
|
return part + ":" + strings.Join(refs, ".")
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadObligationJoinKeys reads the Registry contract and indexes it by citation-unit key.
|
||||||
|
func LoadObligationJoinKeys(path string) (*ObligationJoinKeys, error) {
|
||||||
|
raw, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var o ObligationJoinKeys
|
||||||
|
if err := json.Unmarshal(raw, &o); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
o.byCitationKey = map[string][]string{}
|
||||||
|
for _, ob := range o.ObligationIDs {
|
||||||
|
for _, cu := range ob.CitationUnits {
|
||||||
|
k := citationUnitKey(cu)
|
||||||
|
o.byCitationKey[k] = append(o.byCitationKey[k], ob.ObligationID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &o, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ObligationsForCitation returns the obligation_ids that join (interim) to a citation
|
||||||
|
// reference such as a control_mapping.source_norm.
|
||||||
|
func (o *ObligationJoinKeys) ObligationsForCitation(citationRef string) []string {
|
||||||
|
return o.byCitationKey[citationUnitKey(citationRef)]
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindObligation returns the registry entry for an obligation_id (nil if unknown).
|
||||||
|
func (o *ObligationJoinKeys) FindObligation(obligationID string) *ObligationKey {
|
||||||
|
for i := range o.ObligationIDs {
|
||||||
|
if o.ObligationIDs[i].ObligationID == obligationID {
|
||||||
|
return &o.ObligationIDs[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// mappingReaches reports whether a control mapping reaches an obligation — EXACT via the
|
||||||
|
// adopted obligation_id (semantic, preferred), else via the interim citation_unit join (for
|
||||||
|
// not-yet-adopted rows). Once obligation_id is set, the coarse citation_unit match is ignored:
|
||||||
|
// that is how the semantic join replaces the structural one (e.g. V11.2.1 crypto no longer
|
||||||
|
// rides (2)(d) into user_authentication_required — it goes to credential_confidentiality_protection).
|
||||||
|
func mappingReaches(m ControlMapping, ob ObligationKey, citationKeys map[string]bool) bool {
|
||||||
|
if m.ObligationID != "" {
|
||||||
|
return m.ObligationID == ob.ObligationID
|
||||||
|
}
|
||||||
|
return citationKeys[citationUnitKey(m.SourceNorm)]
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcceptedControlsForObligation returns our accepted control mappings that reach an obligation
|
||||||
|
// (deduped by target control), obligation_id-exact where adopted, citation_unit otherwise.
|
||||||
|
func AcceptedControlsForObligation(ob ObligationKey, mappings *ControlMappingSet) []ControlMapping {
|
||||||
|
keys := make(map[string]bool, len(ob.CitationUnits))
|
||||||
|
for _, cu := range ob.CitationUnits {
|
||||||
|
keys[citationUnitKey(cu)] = true
|
||||||
|
}
|
||||||
|
out := []ControlMapping{}
|
||||||
|
seen := map[string]bool{}
|
||||||
|
for _, m := range mappings.All {
|
||||||
|
if !m.IsAccepted() || !mappingReaches(m, ob, keys) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ck := m.TargetFramework + ":" + m.TargetControl
|
||||||
|
if seen[ck] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[ck] = true
|
||||||
|
out = append(out, m)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// ObligationCoverage is one row of the cross-session coverage report.
|
||||||
|
type ObligationCoverage struct {
|
||||||
|
ObligationID string `json:"obligation_id"`
|
||||||
|
Family string `json:"family"`
|
||||||
|
Status string `json:"status"` // covered | mapped_rejected | uncovered
|
||||||
|
AcceptedControls []string `json:"accepted_controls"`
|
||||||
|
EvidenceCount int `json:"evidence_count"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ComputeObligationCoverage joins the Registry obligations to our control mappings — exact via
|
||||||
|
// obligation_id where adopted, else via the interim citation_unit join — and reports per
|
||||||
|
// obligation: covered (>=1 accepted control reaches it), mapped_rejected (only rejected
|
||||||
|
// mappings reach it), or uncovered. The signal back to the Obligation session.
|
||||||
|
func ComputeObligationCoverage(joins *ObligationJoinKeys, mappings *ControlMappingSet, evidence *EvidenceRequirementSet) []ObligationCoverage {
|
||||||
|
out := make([]ObligationCoverage, 0, len(joins.ObligationIDs))
|
||||||
|
for _, ob := range joins.ObligationIDs {
|
||||||
|
keys := make(map[string]bool, len(ob.CitationUnits))
|
||||||
|
for _, cu := range ob.CitationUnits {
|
||||||
|
keys[citationUnitKey(cu)] = true
|
||||||
|
}
|
||||||
|
cov := ObligationCoverage{ObligationID: ob.ObligationID, Family: ob.Family}
|
||||||
|
seen := map[string]bool{}
|
||||||
|
rejected := false
|
||||||
|
for _, m := range mappings.All {
|
||||||
|
if !mappingReaches(m, ob, keys) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if m.IsAccepted() {
|
||||||
|
ck := m.TargetFramework + ":" + m.TargetControl
|
||||||
|
if !seen[ck] {
|
||||||
|
seen[ck] = true
|
||||||
|
cov.AcceptedControls = append(cov.AcceptedControls, ck)
|
||||||
|
cov.EvidenceCount += len(evidence.RequiredFor(m.TargetFramework, m.TargetControl))
|
||||||
|
}
|
||||||
|
} else if m.MappingStatus == "rejected" {
|
||||||
|
rejected = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case len(cov.AcceptedControls) > 0:
|
||||||
|
cov.Status = "covered"
|
||||||
|
case rejected:
|
||||||
|
cov.Status = "mapped_rejected"
|
||||||
|
default:
|
||||||
|
cov.Status = "uncovered"
|
||||||
|
}
|
||||||
|
out = append(out, cov)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
@@ -0,0 +1,61 @@
|
|||||||
|
package ucca
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestCitationUnitKey_Join(t *testing.T) {
|
||||||
|
// our source_norm and the registry citation_unit must collapse to the SAME key.
|
||||||
|
if citationUnitKey("CRA Annex I Part I (2)(c) — Schutz vor unbefugtem Zugriff") != citationUnitKey("Annex I (2)(c)") {
|
||||||
|
t.Errorf("interim join broken: %q vs %q",
|
||||||
|
citationUnitKey("CRA Annex I Part I (2)(c)"), citationUnitKey("Annex I (2)(c)"))
|
||||||
|
}
|
||||||
|
// Part II must NOT collide with Part I.
|
||||||
|
if citationUnitKey("Annex I Part II (1)") == citationUnitKey("CRA Annex I Part I (2)(c)") {
|
||||||
|
t.Error("Part II must not join to Part I")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadObligationJoinKeys(t *testing.T) {
|
||||||
|
o, err := LoadObligationJoinKeys("../../../obligations/obligation_join_keys.json")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load: %v", err)
|
||||||
|
}
|
||||||
|
if o.Count != len(o.ObligationIDs) {
|
||||||
|
t.Errorf("count %d != len %d", o.Count, len(o.ObligationIDs))
|
||||||
|
}
|
||||||
|
if len(o.ObligationIDs) == 0 {
|
||||||
|
t.Fatal("empty contract")
|
||||||
|
}
|
||||||
|
if got := o.ObligationsForCitation("CRA Annex I Part I (2)(c)"); len(got) == 0 {
|
||||||
|
t.Error("expected an obligation joined to (2)(c)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestObligationCoverage_Report(t *testing.T) {
|
||||||
|
joins, err := LoadObligationJoinKeys("../../../obligations/obligation_join_keys.json")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("join keys: %v", err)
|
||||||
|
}
|
||||||
|
maps, err := LoadControlMappings("../../data/control_mappings")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("mappings: %v", err)
|
||||||
|
}
|
||||||
|
ev, err := LoadEvidenceRequirements("../../data/evidence_requirements")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("evidence: %v", err)
|
||||||
|
}
|
||||||
|
cov := ComputeObligationCoverage(joins, maps, ev)
|
||||||
|
if len(cov) == 0 {
|
||||||
|
t.Fatal("no coverage computed")
|
||||||
|
}
|
||||||
|
byStatus := map[string]int{}
|
||||||
|
for _, c := range cov {
|
||||||
|
byStatus[c.Status]++
|
||||||
|
}
|
||||||
|
t.Logf("COVERAGE: %d Obligations | covered=%d mapped_rejected=%d uncovered=%d",
|
||||||
|
len(cov), byStatus["covered"], byStatus["mapped_rejected"], byStatus["uncovered"])
|
||||||
|
for _, c := range cov {
|
||||||
|
if c.Status != "uncovered" {
|
||||||
|
t.Logf(" %-15s %-36s controls=%v evidence=%d", c.Status, c.ObligationID, c.AcceptedControls, c.EvidenceCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -77,6 +77,8 @@ _ROUTER_MODULES = [
|
|||||||
"licenses_routes",
|
"licenses_routes",
|
||||||
"template_rule_routes",
|
"template_rule_routes",
|
||||||
"specialist_agent_routes",
|
"specialist_agent_routes",
|
||||||
|
"reasoning_routes",
|
||||||
|
"onboarding_routes",
|
||||||
]
|
]
|
||||||
|
|
||||||
_loaded_count = 0
|
_loaded_count = 0
|
||||||
|
|||||||
@@ -0,0 +1,74 @@
|
|||||||
|
"""Onboarding Advisor endpoint — exposes the existing Smart Onboarding Advisor at runtime.
|
||||||
|
|
||||||
|
This adds NO new reasoning logic. It exposes the already-built, tested orchestration (Signal Producers
|
||||||
|
-> Normalizer -> Silent Knowledge Pass -> Advisor) through one runtime endpoint. No DB, no persistence.
|
||||||
|
|
||||||
|
POST /onboarding/advisor-start — (company + certs + target + scanner findings) -> advisory payload
|
||||||
|
GET /onboarding/targets — the supported target ids
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, HTTPException
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.onboarding import (
|
||||||
|
AdvisorMeasure,
|
||||||
|
AdvisorQuestion,
|
||||||
|
InferredAssumption,
|
||||||
|
ProducedSignal,
|
||||||
|
RejectedAssumption,
|
||||||
|
)
|
||||||
|
from compliance.services.onboarding_service import run_advisor, supported_targets
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
router = APIRouter(prefix="/onboarding", tags=["onboarding"])
|
||||||
|
|
||||||
|
|
||||||
|
class OnboardingAdvisorRequest(BaseModel):
|
||||||
|
company: str = ""
|
||||||
|
industry: Optional[str] = None
|
||||||
|
products: List[str] = Field(default_factory=list)
|
||||||
|
markets: List[str] = Field(default_factory=list)
|
||||||
|
certifications: List[str] = Field(default_factory=list)
|
||||||
|
known_evidence: List[str] = Field(default_factory=list)
|
||||||
|
target: str = "CRA"
|
||||||
|
scanner_findings: List[ProducedSignal] = Field(default_factory=list) # adapters upstream produced these
|
||||||
|
|
||||||
|
|
||||||
|
class AdvisorResponse(BaseModel):
|
||||||
|
silent_intake_summary: str = ""
|
||||||
|
headline: str = ""
|
||||||
|
auto_detected: List[str] = Field(default_factory=list)
|
||||||
|
indications: List[str] = Field(default_factory=list) # partial signal: raises strength, still asked
|
||||||
|
inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
|
||||||
|
rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
|
||||||
|
top_5_questions: List[AdvisorQuestion] = Field(default_factory=list)
|
||||||
|
capability_delta: List[str] = Field(default_factory=list)
|
||||||
|
top_measures: List[AdvisorMeasure] = Field(default_factory=list)
|
||||||
|
evidence_requests: List[str] = Field(default_factory=list)
|
||||||
|
unsupported_domains: List[str] = Field(default_factory=list)
|
||||||
|
completeness_summary: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/targets")
|
||||||
|
def list_targets() -> dict:
|
||||||
|
return {"targets": supported_targets()}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/advisor-start", response_model=AdvisorResponse)
|
||||||
|
def advisor_start_endpoint(req: OnboardingAdvisorRequest) -> AdvisorResponse:
|
||||||
|
if req.target not in supported_targets():
|
||||||
|
raise HTTPException(status_code=404, detail="unsupported target '%s'; supported: %s" % (req.target, supported_targets()))
|
||||||
|
result, si_summary = run_advisor(
|
||||||
|
company=req.company, certifications=req.certifications, target=req.target,
|
||||||
|
signals=req.scanner_findings, known_evidence=req.known_evidence,
|
||||||
|
products=req.products, markets=req.markets, industry=req.industry or "")
|
||||||
|
return AdvisorResponse(
|
||||||
|
silent_intake_summary=si_summary, headline=result.headline, auto_detected=result.auto_detected,
|
||||||
|
indications=result.indications,
|
||||||
|
inferred_assumptions=result.inferred_assumptions, rejected_assumptions=result.rejected_assumptions,
|
||||||
|
top_5_questions=result.next_best_questions, capability_delta=result.capability_delta,
|
||||||
|
top_measures=result.top_measures, evidence_requests=result.evidence_requests,
|
||||||
|
unsupported_domains=result.unsupported_domains, completeness_summary=result.completeness_summary)
|
||||||
@@ -0,0 +1,98 @@
|
|||||||
|
"""HTTP endpoints for the Regulatory Reasoning Engine (spec §7).
|
||||||
|
|
||||||
|
Thin handlers — all reasoning lives in `compliance.reasoning.*`. No DB, no RAG;
|
||||||
|
pure deterministic rule evaluation.
|
||||||
|
|
||||||
|
POST /reasoning/scope -> which regulations apply + missing facts
|
||||||
|
POST /reasoning/obligations -> obligations, overlaps, multi-evidence
|
||||||
|
POST /reasoning/implementation-reasoning -> claim->obligation mapping (Welt 1, no verdict)
|
||||||
|
POST /reasoning/interpretation-assessment -> verdict on a customer interpretation
|
||||||
|
POST /reasoning/product-scope -> gate on facts, else run discover_scope once
|
||||||
|
POST /reasoning/regulatory-map -> customer-readable read-model over the scope
|
||||||
|
POST /reasoning/interpretation-in-map -> judge a customer interpretation within the map
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from compliance.interpretation_map import (
|
||||||
|
InterpretationInMapRequest,
|
||||||
|
InterpretationInMapResult,
|
||||||
|
interpret_in_map,
|
||||||
|
)
|
||||||
|
from compliance.product_scope import (
|
||||||
|
ProductScopeRequest,
|
||||||
|
ProductScopeResponse,
|
||||||
|
resolve_product_scope,
|
||||||
|
)
|
||||||
|
from compliance.regulatory_map import RegulatoryMap, RegulatoryMapRequest, render_regulatory_map
|
||||||
|
from compliance.reasoning import (
|
||||||
|
assess_interpretation,
|
||||||
|
derive_obligations,
|
||||||
|
discover_scope,
|
||||||
|
reason_implementation_claim,
|
||||||
|
)
|
||||||
|
from compliance.reasoning.schemas import (
|
||||||
|
ImplementationReasoningRequest,
|
||||||
|
ImplementationReasoningResponse,
|
||||||
|
InterpretationRequest,
|
||||||
|
InterpretationResponse,
|
||||||
|
ObligationsRequest,
|
||||||
|
ObligationsResponse,
|
||||||
|
ScopeRequest,
|
||||||
|
ScopeResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/reasoning", tags=["reasoning"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/scope", response_model=ScopeResponse)
|
||||||
|
def scope_discovery(req: ScopeRequest) -> ScopeResponse:
|
||||||
|
scope = discover_scope(req.product_profile)
|
||||||
|
return ScopeResponse(
|
||||||
|
regulatory_scope=scope,
|
||||||
|
missing_facts=scope.missing_facts,
|
||||||
|
confidence=scope.confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/obligations", response_model=ObligationsResponse)
|
||||||
|
def applicable_obligations(req: ObligationsRequest) -> ObligationsResponse:
|
||||||
|
return derive_obligations(req.product_profile, req.regulatory_scope)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/implementation-reasoning", response_model=ImplementationReasoningResponse)
|
||||||
|
def implementation_reasoning(req: ImplementationReasoningRequest) -> ImplementationReasoningResponse:
|
||||||
|
return reason_implementation_claim(req.product_profile, req.customer_claim)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/product-scope", response_model=ProductScopeResponse)
|
||||||
|
def product_scope(req: ProductScopeRequest) -> ProductScopeResponse:
|
||||||
|
return resolve_product_scope(req.product_profile)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/regulatory-map", response_model=RegulatoryMap)
|
||||||
|
def regulatory_map(req: RegulatoryMapRequest) -> RegulatoryMap:
|
||||||
|
return render_regulatory_map(req.product_profile)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/interpretation-in-map", response_model=InterpretationInMapResult)
|
||||||
|
def interpretation_in_map(req: InterpretationInMapRequest) -> InterpretationInMapResult:
|
||||||
|
reg_map = render_regulatory_map(req.product_profile)
|
||||||
|
return interpret_in_map(reg_map, req.customer_interpretation)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/interpretation-assessment", response_model=InterpretationResponse)
|
||||||
|
def interpretation_assessment(req: InterpretationRequest) -> InterpretationResponse:
|
||||||
|
result = assess_interpretation(req.customer_interpretation, req.product_profile)
|
||||||
|
return InterpretationResponse(
|
||||||
|
assessment=result.assessment,
|
||||||
|
affected_regulations=result.affected_regulations,
|
||||||
|
affected_obligations=result.affected_obligations,
|
||||||
|
corrected_interpretation=result.corrected_interpretation,
|
||||||
|
risks=result.risks,
|
||||||
|
legal_basis_refs=result.legal_basis_refs,
|
||||||
|
explanation=result.explanation,
|
||||||
|
confidence=result.confidence,
|
||||||
|
)
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
"""Master Capability Registry v0 (Phase 2C) — Compliance Execution domain.
|
||||||
|
|
||||||
|
Registry + minting layer for Master Capabilities — the third instance of the
|
||||||
|
identity-machine pattern (Master Controls, Master Obligations, Master Capabilities).
|
||||||
|
|
||||||
|
STORED: identities, sources, relationship types, policy versions, lifecycle events,
|
||||||
|
provenance. DERIVED (never stored): confidence, coverage, gap.
|
||||||
|
|
||||||
|
v0 scope: types + minting + typed relations + versioned policy + identity lifecycle.
|
||||||
|
NOT here: Company-Gap, real ISO/cert mappings, certification derivations, UI, RAG,
|
||||||
|
new meta-model class, generic canonicalization engine.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import (
|
||||||
|
CapabilityRegistry,
|
||||||
|
deprecate_capability,
|
||||||
|
evaluate_relation,
|
||||||
|
merge_capabilities,
|
||||||
|
mint_capability,
|
||||||
|
resolve,
|
||||||
|
split_capability,
|
||||||
|
)
|
||||||
|
from .policy import DEFAULT_POLICY, assert_no_certification_confirms
|
||||||
|
from .schemas import (
|
||||||
|
AssertionStatus,
|
||||||
|
CapabilityCandidate,
|
||||||
|
CapabilityRelation,
|
||||||
|
Confidence,
|
||||||
|
DerivedAssessment,
|
||||||
|
EvidenceKind,
|
||||||
|
IdentityLifecycleEvent,
|
||||||
|
LifecycleEventType,
|
||||||
|
LifecycleState,
|
||||||
|
MasterCapability,
|
||||||
|
PolicyRule,
|
||||||
|
PolicyVersion,
|
||||||
|
Provenance,
|
||||||
|
RelationType,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# engine
|
||||||
|
"CapabilityRegistry",
|
||||||
|
"mint_capability",
|
||||||
|
"evaluate_relation",
|
||||||
|
"resolve",
|
||||||
|
"deprecate_capability",
|
||||||
|
"merge_capabilities",
|
||||||
|
"split_capability",
|
||||||
|
# policy
|
||||||
|
"DEFAULT_POLICY",
|
||||||
|
"assert_no_certification_confirms",
|
||||||
|
# schemas
|
||||||
|
"MasterCapability",
|
||||||
|
"CapabilityCandidate",
|
||||||
|
"CapabilityRelation",
|
||||||
|
"RelationType",
|
||||||
|
"EvidenceKind",
|
||||||
|
"AssertionStatus",
|
||||||
|
"Confidence",
|
||||||
|
"PolicyRule",
|
||||||
|
"PolicyVersion",
|
||||||
|
"IdentityLifecycleEvent",
|
||||||
|
"LifecycleEventType",
|
||||||
|
"LifecycleState",
|
||||||
|
"Provenance",
|
||||||
|
"DerivedAssessment",
|
||||||
|
]
|
||||||
@@ -0,0 +1,191 @@
|
|||||||
|
"""Master Capability Registry v0 — minting, derivation, identity lifecycle.
|
||||||
|
|
||||||
|
STORED on the registry: identities, sources, relation types, policy versions,
|
||||||
|
lifecycle events, provenance. DERIVED (never stored): confidence/status, via
|
||||||
|
`evaluate_relation` under a versioned policy.
|
||||||
|
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Set
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from .policy import DEFAULT_POLICY
|
||||||
|
from .schemas import (
|
||||||
|
AssertionStatus,
|
||||||
|
CapabilityCandidate,
|
||||||
|
CapabilityRelation,
|
||||||
|
Confidence,
|
||||||
|
DerivedAssessment,
|
||||||
|
IdentityLifecycleEvent,
|
||||||
|
LifecycleEventType,
|
||||||
|
LifecycleState,
|
||||||
|
MasterCapability,
|
||||||
|
PolicyVersion,
|
||||||
|
Provenance,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CapabilityRegistry(BaseModel):
|
||||||
|
# NOTE: no confidence/coverage field anywhere — those are DERIVED, never stored.
|
||||||
|
capabilities: Dict[str, MasterCapability] = Field(default_factory=dict)
|
||||||
|
relations: List[CapabilityRelation] = Field(default_factory=list)
|
||||||
|
lifecycle_events: List[IdentityLifecycleEvent] = Field(default_factory=list)
|
||||||
|
policy: PolicyVersion = Field(default_factory=lambda: DEFAULT_POLICY)
|
||||||
|
next_serial: int = 1
|
||||||
|
|
||||||
|
|
||||||
|
def _mcap_id(serial: int) -> str:
|
||||||
|
return "MCAP-%05d" % serial
|
||||||
|
|
||||||
|
|
||||||
|
def _next_event_id(registry: "CapabilityRegistry") -> str:
|
||||||
|
return "evt-%d" % (len(registry.lifecycle_events) + 1)
|
||||||
|
|
||||||
|
|
||||||
|
def mint_capability(
|
||||||
|
registry: CapabilityRegistry,
|
||||||
|
candidate: CapabilityCandidate,
|
||||||
|
provenance: Optional[Provenance] = None,
|
||||||
|
name: str = "",
|
||||||
|
definition: str = "",
|
||||||
|
category: str = "",
|
||||||
|
domains: Optional[List[str]] = None,
|
||||||
|
) -> MasterCapability:
|
||||||
|
"""Assign the next stable MCAP id to a candidate and register it (with provenance)."""
|
||||||
|
cap_id = _mcap_id(registry.next_serial)
|
||||||
|
cap = MasterCapability(
|
||||||
|
capability_id=cap_id,
|
||||||
|
name=name or candidate.normalized or candidate.raw_term,
|
||||||
|
definition=definition,
|
||||||
|
category=category,
|
||||||
|
domains=domains or [],
|
||||||
|
provenance=provenance
|
||||||
|
or Provenance(author="system", basis="minted from candidate '%s'" % candidate.raw_term),
|
||||||
|
)
|
||||||
|
registry.capabilities[cap_id] = cap
|
||||||
|
registry.next_serial += 1
|
||||||
|
return cap
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_relation(
|
||||||
|
relation: CapabilityRelation, policy: Optional[PolicyVersion] = None
|
||||||
|
) -> DerivedAssessment:
|
||||||
|
"""Derive (status, confidence) from (relationship_type, evidence_kind) under a
|
||||||
|
versioned policy. Deterministic; result is returned, never stored."""
|
||||||
|
pol = policy if policy is not None else DEFAULT_POLICY
|
||||||
|
status = AssertionStatus.UNKNOWN
|
||||||
|
confidence = Confidence.LOW
|
||||||
|
found = False
|
||||||
|
for rule in pol.rules:
|
||||||
|
if (
|
||||||
|
rule.relationship_type == relation.relationship_type
|
||||||
|
and rule.evidence_kind == relation.evidence_kind
|
||||||
|
):
|
||||||
|
status, confidence, found = rule.status, rule.confidence, True
|
||||||
|
break
|
||||||
|
expl = "%s + %s under %s -> %s/%s%s" % (
|
||||||
|
relation.relationship_type.value,
|
||||||
|
relation.evidence_kind.value,
|
||||||
|
pol.policy_version,
|
||||||
|
status.value,
|
||||||
|
confidence.value,
|
||||||
|
"" if found else " (no rule)",
|
||||||
|
)
|
||||||
|
return DerivedAssessment(
|
||||||
|
target_capability_id=relation.target_capability_id,
|
||||||
|
status=status,
|
||||||
|
confidence=confidence,
|
||||||
|
policy_version=pol.policy_version,
|
||||||
|
explanation=expl,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve(
|
||||||
|
registry: CapabilityRegistry, capability_id: str, _seen: Optional[Set[str]] = None
|
||||||
|
) -> Optional[MasterCapability]:
|
||||||
|
"""Follow redirects (from merge/deprecate) to the current canonical capability."""
|
||||||
|
seen = _seen if _seen is not None else set()
|
||||||
|
if capability_id in seen:
|
||||||
|
return None # redirect cycle guard
|
||||||
|
seen.add(capability_id)
|
||||||
|
cap = registry.capabilities.get(capability_id)
|
||||||
|
if cap is None:
|
||||||
|
return None
|
||||||
|
if cap.redirect_to:
|
||||||
|
return resolve(registry, cap.redirect_to, seen)
|
||||||
|
# terminal: only an ACTIVE capability resolves; a deprecated dead-end -> None
|
||||||
|
return cap if cap.state == LifecycleState.ACTIVE else None
|
||||||
|
|
||||||
|
|
||||||
|
def deprecate_capability(
|
||||||
|
registry: CapabilityRegistry,
|
||||||
|
capability_id: str,
|
||||||
|
redirect_to: Optional[str] = None,
|
||||||
|
provenance: Optional[Provenance] = None,
|
||||||
|
) -> IdentityLifecycleEvent:
|
||||||
|
cap = registry.capabilities.get(capability_id)
|
||||||
|
if cap is None:
|
||||||
|
raise KeyError(capability_id)
|
||||||
|
cap.state = LifecycleState.DEPRECATED
|
||||||
|
cap.redirect_to = redirect_to
|
||||||
|
event = IdentityLifecycleEvent(
|
||||||
|
event_id=_next_event_id(registry),
|
||||||
|
event_type=LifecycleEventType.REDIRECT if redirect_to else LifecycleEventType.DEPRECATE,
|
||||||
|
from_ids=[capability_id],
|
||||||
|
to_ids=[redirect_to] if redirect_to else [],
|
||||||
|
provenance=provenance or Provenance(author="system", basis="deprecate %s" % capability_id),
|
||||||
|
)
|
||||||
|
registry.lifecycle_events.append(event)
|
||||||
|
return event
|
||||||
|
|
||||||
|
|
||||||
|
def merge_capabilities(
|
||||||
|
registry: CapabilityRegistry,
|
||||||
|
from_id: str,
|
||||||
|
into_id: str,
|
||||||
|
provenance: Optional[Provenance] = None,
|
||||||
|
) -> IdentityLifecycleEvent:
|
||||||
|
"""Merge `from_id` into `into_id`: deprecate `from_id` with a redirect to `into_id`."""
|
||||||
|
if from_id not in registry.capabilities or into_id not in registry.capabilities:
|
||||||
|
raise KeyError("%s or %s" % (from_id, into_id))
|
||||||
|
frm = registry.capabilities[from_id]
|
||||||
|
frm.state = LifecycleState.DEPRECATED
|
||||||
|
frm.redirect_to = into_id
|
||||||
|
event = IdentityLifecycleEvent(
|
||||||
|
event_id=_next_event_id(registry),
|
||||||
|
event_type=LifecycleEventType.MERGE,
|
||||||
|
from_ids=[from_id],
|
||||||
|
to_ids=[into_id],
|
||||||
|
provenance=provenance or Provenance(author="system", basis="merge %s -> %s" % (from_id, into_id)),
|
||||||
|
)
|
||||||
|
registry.lifecycle_events.append(event)
|
||||||
|
return event
|
||||||
|
|
||||||
|
|
||||||
|
def split_capability(
|
||||||
|
registry: CapabilityRegistry,
|
||||||
|
from_id: str,
|
||||||
|
into_ids: List[str],
|
||||||
|
primary: Optional[str] = None,
|
||||||
|
provenance: Optional[Provenance] = None,
|
||||||
|
) -> IdentityLifecycleEvent:
|
||||||
|
"""Split `from_id` into several capabilities. The old id deprecates; it redirects
|
||||||
|
to `primary` only if one is given (else it resolves to None — split is ambiguous)."""
|
||||||
|
if from_id not in registry.capabilities:
|
||||||
|
raise KeyError(from_id)
|
||||||
|
frm = registry.capabilities[from_id]
|
||||||
|
frm.state = LifecycleState.DEPRECATED
|
||||||
|
frm.redirect_to = primary
|
||||||
|
event = IdentityLifecycleEvent(
|
||||||
|
event_id=_next_event_id(registry),
|
||||||
|
event_type=LifecycleEventType.SPLIT,
|
||||||
|
from_ids=[from_id],
|
||||||
|
to_ids=list(into_ids),
|
||||||
|
provenance=provenance or Provenance(author="system", basis="split %s" % from_id),
|
||||||
|
)
|
||||||
|
registry.lifecycle_events.append(event)
|
||||||
|
return event
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
"""Derivation policy v0 for the Master Capability Registry.
|
||||||
|
|
||||||
|
Confidence + status are DERIVED from (relationship_type, evidence_kind) under a
|
||||||
|
versioned policy — never stored. HARD RULE baked in and structurally guarded: a
|
||||||
|
CERTIFICATION is a claim, never proof — no certification-backed rule may yield
|
||||||
|
CONFIRMED. CONFIRMED requires a CONFIRMS relation backed by a concrete ARTIFACT
|
||||||
|
(or an EXPERT assertion).
|
||||||
|
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
AssertionStatus,
|
||||||
|
Confidence,
|
||||||
|
EvidenceKind,
|
||||||
|
PolicyRule,
|
||||||
|
PolicyVersion,
|
||||||
|
RelationType,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _rule(rt: RelationType, ek: EvidenceKind, st: AssertionStatus, cf: Confidence) -> PolicyRule:
|
||||||
|
return PolicyRule(relationship_type=rt, evidence_kind=ek, status=st, confidence=cf)
|
||||||
|
|
||||||
|
|
||||||
|
# (relationship_type, evidence_kind) -> (status, confidence)
|
||||||
|
_V0_RULES = [
|
||||||
|
# concrete artifact / expert confirming the capability -> CONFIRMED
|
||||||
|
_rule(RelationType.CONFIRMS, EvidenceKind.ARTIFACT, AssertionStatus.CONFIRMED, Confidence.HIGH),
|
||||||
|
_rule(RelationType.CONFIRMS, EvidenceKind.EXPERT, AssertionStatus.CONFIRMED, Confidence.MEDIUM),
|
||||||
|
# equivalent capability — certificate or artifact behind it -> INFERRED (never confirmed)
|
||||||
|
_rule(RelationType.EQUIVALENT, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.HIGH),
|
||||||
|
_rule(RelationType.EQUIVALENT, EvidenceKind.ARTIFACT, AssertionStatus.INFERRED, Confidence.HIGH),
|
||||||
|
# supports — weaker
|
||||||
|
_rule(RelationType.SUPPORTS, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
|
||||||
|
_rule(RelationType.SUPPORTS, EvidenceKind.ARTIFACT, AssertionStatus.INFERRED, Confidence.MEDIUM),
|
||||||
|
# requires = an obligation NEEDS the capability (relevance, not possession)
|
||||||
|
_rule(RelationType.REQUIRES, EvidenceKind.NONE, AssertionStatus.UNKNOWN, Confidence.LOW),
|
||||||
|
# broader/narrower certificate -> weak inference
|
||||||
|
_rule(RelationType.BROADER_THAN, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
|
||||||
|
_rule(RelationType.NARROWER_THAN, EvidenceKind.CERTIFICATION, AssertionStatus.INFERRED, Confidence.LOW),
|
||||||
|
_rule(RelationType.RELATED_TO, EvidenceKind.CERTIFICATION, AssertionStatus.UNKNOWN, Confidence.LOW),
|
||||||
|
]
|
||||||
|
|
||||||
|
DEFAULT_POLICY = PolicyVersion(
|
||||||
|
policy_version="capability-policy-v0",
|
||||||
|
description="v0: certification never yields CONFIRMED; only CONFIRMS + ARTIFACT/EXPERT does.",
|
||||||
|
rules=_V0_RULES,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def assert_no_certification_confirms(policy: PolicyVersion) -> None:
|
||||||
|
"""Structural guard for the hard rule: no CERTIFICATION-backed rule is CONFIRMED."""
|
||||||
|
for r in policy.rules:
|
||||||
|
if r.evidence_kind == EvidenceKind.CERTIFICATION and r.status == AssertionStatus.CONFIRMED:
|
||||||
|
raise ValueError(
|
||||||
|
"policy %s violates hard rule: certification -> confirmed (%s)"
|
||||||
|
% (policy.policy_version, r.relationship_type.value)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# fail fast at import: the shipped default must satisfy the hard rule
|
||||||
|
assert_no_certification_confirms(DEFAULT_POLICY)
|
||||||
@@ -0,0 +1,150 @@
|
|||||||
|
"""Master Capability Registry v0 — Compliance Execution domain (Phase 2C).
|
||||||
|
|
||||||
|
Built from the Reasoning session per user directive, but this IS the Compliance
|
||||||
|
Execution model (Execution owns Capability). Third real instance of the
|
||||||
|
identity-machine pattern (after Master Controls and Master Obligations):
|
||||||
|
|
||||||
|
Candidate -> Normalization -> Dedup -> Stable Identity (MCAP) -> Typed Relations
|
||||||
|
|
||||||
|
KEY SENTENCE (stored vs derived):
|
||||||
|
STORED : identities, sources, relationship types, policy versions, lifecycle
|
||||||
|
events, provenance.
|
||||||
|
DERIVED : confidence, coverage and gap statements — computed on demand, NEVER
|
||||||
|
stored (see policy.py / engine.evaluate_relation).
|
||||||
|
|
||||||
|
These are APPLICATION/registry types, NOT compliance-meta-model classes. In
|
||||||
|
particular `CapabilityRelation` is relation METADATA inside the registry — it does
|
||||||
|
NOT introduce a new meta-model class. Whether a reified relation must enter the
|
||||||
|
frozen meta-model is a Meta-Model-Owner decision (architecture freeze v1.0),
|
||||||
|
deferred until a demonstrable failure case exists.
|
||||||
|
|
||||||
|
Self-contained (no Reasoning import — Reasoning consumes Capability, not the other
|
||||||
|
way round). Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class Confidence(str, Enum):
|
||||||
|
HIGH = "high"
|
||||||
|
MEDIUM = "medium"
|
||||||
|
LOW = "low"
|
||||||
|
|
||||||
|
|
||||||
|
class AssertionStatus(str, Enum):
|
||||||
|
"""How well-established a capability claim is. A numeric score is presentation;
|
||||||
|
THIS type is the truth (derived from relationship type + evidence + policy)."""
|
||||||
|
|
||||||
|
DECLARED = "declared"
|
||||||
|
INFERRED = "inferred"
|
||||||
|
CONFIRMED = "confirmed"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
class RelationType(str, Enum):
|
||||||
|
EQUIVALENT = "equivalent"
|
||||||
|
SUPPORTS = "supports"
|
||||||
|
REQUIRES = "requires"
|
||||||
|
CONFIRMS = "confirms"
|
||||||
|
BROADER_THAN = "broader_than"
|
||||||
|
NARROWER_THAN = "narrower_than"
|
||||||
|
RELATED_TO = "related_to"
|
||||||
|
|
||||||
|
|
||||||
|
class EvidenceKind(str, Enum):
|
||||||
|
CERTIFICATION = "certification" # a held certificate — a CLAIM, never proof
|
||||||
|
ARTIFACT = "artifact" # concrete doc/config/test/log
|
||||||
|
EXPERT = "expert" # human expert assertion
|
||||||
|
NONE = "none"
|
||||||
|
|
||||||
|
|
||||||
|
class LifecycleState(str, Enum):
|
||||||
|
ACTIVE = "active"
|
||||||
|
DEPRECATED = "deprecated"
|
||||||
|
|
||||||
|
|
||||||
|
class LifecycleEventType(str, Enum):
|
||||||
|
MERGE = "merge"
|
||||||
|
SPLIT = "split"
|
||||||
|
DEPRECATE = "deprecate"
|
||||||
|
REDIRECT = "redirect"
|
||||||
|
|
||||||
|
|
||||||
|
class Provenance(BaseModel):
|
||||||
|
"""Every CURATED atom carries its own provenance (who / when / on what basis)."""
|
||||||
|
|
||||||
|
author: str = ""
|
||||||
|
asserted_at: Optional[str] = None # ISO timestamp passed in; never generated here
|
||||||
|
basis: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ── stored: identity ──────────────────────────────────────────────────────
|
||||||
|
class MasterCapability(BaseModel):
|
||||||
|
capability_id: str # stable MCAP-xxxxx
|
||||||
|
name: str = ""
|
||||||
|
definition: str = ""
|
||||||
|
category: str = ""
|
||||||
|
domains: List[str] = Field(default_factory=list)
|
||||||
|
typical_evidence: List[str] = Field(default_factory=list)
|
||||||
|
version: int = 1
|
||||||
|
state: LifecycleState = LifecycleState.ACTIVE
|
||||||
|
redirect_to: Optional[str] = None # set on merge/deprecate
|
||||||
|
provenance: Provenance = Field(default_factory=Provenance)
|
||||||
|
|
||||||
|
|
||||||
|
class CapabilityCandidate(BaseModel):
|
||||||
|
raw_term: str # e.g. "Patch Management"
|
||||||
|
source: str = "" # e.g. "CRA:Annex I (2)(d)"
|
||||||
|
normalized: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ── stored: typed relation metadata (NOT a meta-model class) ──────────────
|
||||||
|
class CapabilityRelation(BaseModel):
|
||||||
|
relation_id: str
|
||||||
|
source: str # external term/obligation/certification id, e.g. "certification:ISO27001"
|
||||||
|
target_capability_id: str # MCAP-...
|
||||||
|
relationship_type: RelationType
|
||||||
|
evidence_kind: EvidenceKind = EvidenceKind.NONE
|
||||||
|
provenance: Provenance = Field(default_factory=Provenance)
|
||||||
|
|
||||||
|
|
||||||
|
# ── stored: versioned derivation policy ───────────────────────────────────
|
||||||
|
class PolicyRule(BaseModel):
|
||||||
|
relationship_type: RelationType
|
||||||
|
evidence_kind: EvidenceKind
|
||||||
|
status: AssertionStatus
|
||||||
|
confidence: Confidence
|
||||||
|
|
||||||
|
|
||||||
|
class PolicyVersion(BaseModel):
|
||||||
|
"""A versioned derivation policy. `policy_version` is recorded with every
|
||||||
|
assessment so "why did you say X last year" is answerable with the policy
|
||||||
|
as-of-then. Without this, `derived` and `auditable/reproducible` contradict."""
|
||||||
|
|
||||||
|
policy_version: str
|
||||||
|
description: str = ""
|
||||||
|
rules: List[PolicyRule] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ── stored: identity lifecycle ────────────────────────────────────────────
|
||||||
|
class IdentityLifecycleEvent(BaseModel):
|
||||||
|
event_id: str
|
||||||
|
event_type: LifecycleEventType
|
||||||
|
from_ids: List[str] = Field(default_factory=list)
|
||||||
|
to_ids: List[str] = Field(default_factory=list)
|
||||||
|
at: Optional[str] = None
|
||||||
|
provenance: Provenance = Field(default_factory=Provenance)
|
||||||
|
|
||||||
|
|
||||||
|
# ── DERIVED — never stored ────────────────────────────────────────────────
|
||||||
|
class DerivedAssessment(BaseModel):
|
||||||
|
target_capability_id: str
|
||||||
|
status: AssertionStatus
|
||||||
|
confidence: Confidence
|
||||||
|
policy_version: str
|
||||||
|
explanation: str = ""
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
"""Company Intelligence (Phase 2A) — Company Capability Profile foundation.
|
||||||
|
|
||||||
|
The HEAD of the spine Company -> Capability -> Product -> Regulation -> Obligation
|
||||||
|
-> Procedure -> Evidence. Builds a CompanyContext into a CompanyCapabilityProfile
|
||||||
|
with a four-state trust model (declared/inferred/confirmed/unknown). A certification
|
||||||
|
yields at most an INFERRED candidate — never "erfuellt".
|
||||||
|
|
||||||
|
Reasoning OWNS the container + trust-state; it CONSUMES the Certification->Capability
|
||||||
|
mapping (Execution-owned) via an injected contract — no mapping data in product code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .contract import CapabilityMappingEntry, CertificationCapabilityMap, EMPTY_MAPPING
|
||||||
|
from .engine import build_company_profile
|
||||||
|
from .schemas import (
|
||||||
|
CapabilityEvidence,
|
||||||
|
Certification,
|
||||||
|
CompanyCapabilityProfile,
|
||||||
|
CompanyContext,
|
||||||
|
Declaration,
|
||||||
|
ExistingEvidence,
|
||||||
|
ExistingProcess,
|
||||||
|
ExistingSystem,
|
||||||
|
OperationalCapability,
|
||||||
|
OperationalCapabilityCandidate,
|
||||||
|
VerificationStatus,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"build_company_profile",
|
||||||
|
"CompanyContext",
|
||||||
|
"CompanyCapabilityProfile",
|
||||||
|
"Certification",
|
||||||
|
"Declaration",
|
||||||
|
"ExistingProcess",
|
||||||
|
"ExistingSystem",
|
||||||
|
"ExistingEvidence",
|
||||||
|
"CapabilityEvidence",
|
||||||
|
"OperationalCapabilityCandidate",
|
||||||
|
"OperationalCapability",
|
||||||
|
"VerificationStatus",
|
||||||
|
"CapabilityMappingEntry",
|
||||||
|
"CertificationCapabilityMap",
|
||||||
|
"EMPTY_MAPPING",
|
||||||
|
]
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
"""Consumption contract for the Certification -> Capability mapping.
|
||||||
|
|
||||||
|
OWNERSHIP BOUNDARY (hard): the Capability Registry, CapabilityDefinition and the
|
||||||
|
Certification->Capability / Feature->Capability mapping RULES live in the Compliance
|
||||||
|
Execution domain. This Reasoning layer defines ONLY the shape it consumes and never
|
||||||
|
ships mapping DATA in product code — tests inject mocks, so the real table can only
|
||||||
|
ever live in Execution.
|
||||||
|
|
||||||
|
Execution will eventually provide CapabilityRegistry / CapabilityMapping /
|
||||||
|
CapabilityDefinition; Reasoning consumes exactly `OperationalCapabilityCandidate`
|
||||||
|
{capability_id, source, confidence, verification_status} (see schemas.py) and the
|
||||||
|
minimal mapping SHAPE below — nothing more.
|
||||||
|
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import Confidence
|
||||||
|
|
||||||
|
|
||||||
|
class CapabilityMappingEntry(BaseModel):
|
||||||
|
"""One mapping rule SHAPE: a certification implies candidate capabilities.
|
||||||
|
|
||||||
|
Contract type only. The actual table (which capabilities ISO27001 implies) is
|
||||||
|
Execution's DATA and MUST NOT be hard-coded here or anywhere in product code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
capability_ids: List[str] = Field(default_factory=list)
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
|
|
||||||
|
|
||||||
|
# certification_id -> entry. Injected at call time; product code holds NO entries.
|
||||||
|
CertificationCapabilityMap = Dict[str, CapabilityMappingEntry]
|
||||||
|
|
||||||
|
# Intentionally empty: without an injected mapping there are zero inferred
|
||||||
|
# candidates. This is the architectural guarantee that the registry lives only in
|
||||||
|
# the Compliance Execution domain.
|
||||||
|
EMPTY_MAPPING: CertificationCapabilityMap = {}
|
||||||
@@ -0,0 +1,114 @@
|
|||||||
|
"""Company Intelligence engine (Phase 2A) — build the Company Capability Profile.
|
||||||
|
|
||||||
|
Deterministic, no LLM/RAG. Turns a raw CompanyContext into capability evidence,
|
||||||
|
candidates and (only via explicit verification) confirmed capabilities.
|
||||||
|
|
||||||
|
HARD RULE enforced here: a certification yields at most an INFERRED candidate; it
|
||||||
|
can NEVER produce a CONFIRMED capability on its own. Only real ExistingEvidence
|
||||||
|
(`proves_capability_id`) promotes a capability to CONFIRMED. Certifications without
|
||||||
|
a known mapping yield evidence-of-claim but NO inferred capability (the mapping is
|
||||||
|
Execution's data, injected — never hard-coded here).
|
||||||
|
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import Confidence
|
||||||
|
|
||||||
|
from .contract import EMPTY_MAPPING, CertificationCapabilityMap
|
||||||
|
from .schemas import (
|
||||||
|
CapabilityEvidence,
|
||||||
|
CompanyCapabilityProfile,
|
||||||
|
CompanyContext,
|
||||||
|
OperationalCapability,
|
||||||
|
OperationalCapabilityCandidate,
|
||||||
|
VerificationStatus,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _declared(context: CompanyContext) -> List[OperationalCapabilityCandidate]:
|
||||||
|
out: List[OperationalCapabilityCandidate] = []
|
||||||
|
for d in context.declarations:
|
||||||
|
out.append(
|
||||||
|
OperationalCapabilityCandidate(
|
||||||
|
capability_id=d.capability_id,
|
||||||
|
source="declaration:%s" % context.company_id,
|
||||||
|
confidence=Confidence.MEDIUM,
|
||||||
|
verification_status=VerificationStatus.DECLARED,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _from_certifications(
|
||||||
|
context: CompanyContext, mapping: CertificationCapabilityMap
|
||||||
|
) -> Tuple[List[CapabilityEvidence], List[OperationalCapabilityCandidate]]:
|
||||||
|
# refinement 1: certification -> evidence-of-capability (claim) -> inferred candidate
|
||||||
|
evidence: List[CapabilityEvidence] = []
|
||||||
|
inferred: List[OperationalCapabilityCandidate] = []
|
||||||
|
for cert in context.certifications:
|
||||||
|
source = "certification:%s" % cert.certification_id
|
||||||
|
evidence.append(
|
||||||
|
CapabilityEvidence(
|
||||||
|
source=source,
|
||||||
|
claim="Company holds %s" % (cert.name or cert.certification_id),
|
||||||
|
certification_id=cert.certification_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
entry = mapping.get(cert.certification_id)
|
||||||
|
if entry is None:
|
||||||
|
continue # no mapping known -> NO inferred capability (data is Execution's)
|
||||||
|
for cap_id in entry.capability_ids:
|
||||||
|
inferred.append(
|
||||||
|
OperationalCapabilityCandidate(
|
||||||
|
capability_id=cap_id,
|
||||||
|
source=source,
|
||||||
|
confidence=entry.confidence,
|
||||||
|
verification_status=VerificationStatus.INFERRED,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return evidence, inferred
|
||||||
|
|
||||||
|
|
||||||
|
def _confirmed_from_evidence(context: CompanyContext) -> List[OperationalCapability]:
|
||||||
|
proven: Dict[str, List[str]] = {}
|
||||||
|
for ev in context.evidence:
|
||||||
|
cap = ev.proves_capability_id
|
||||||
|
if not cap:
|
||||||
|
continue
|
||||||
|
proven.setdefault(cap, []).append(ev.evidence_id)
|
||||||
|
return [
|
||||||
|
OperationalCapability(
|
||||||
|
capability_id=cap,
|
||||||
|
verification_status=VerificationStatus.CONFIRMED,
|
||||||
|
confidence=Confidence.HIGH,
|
||||||
|
sources=sources,
|
||||||
|
)
|
||||||
|
for cap, sources in proven.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def build_company_profile(
|
||||||
|
context: CompanyContext, mapping: Optional[CertificationCapabilityMap] = None
|
||||||
|
) -> CompanyCapabilityProfile:
|
||||||
|
"""Build the Company Capability Profile from raw context + an injected mapping.
|
||||||
|
|
||||||
|
`mapping` defaults to EMPTY (no inferred candidates) so that the cert->capability
|
||||||
|
table can only ever come from the Compliance Execution domain.
|
||||||
|
"""
|
||||||
|
mapping = EMPTY_MAPPING if mapping is None else mapping
|
||||||
|
evidence, inferred = _from_certifications(context, mapping)
|
||||||
|
declared = _declared(context)
|
||||||
|
confirmed = _confirmed_from_evidence(context)
|
||||||
|
confirmed_ids = {oc.capability_id for oc in confirmed}
|
||||||
|
# a confirmed capability is no longer a mere candidate
|
||||||
|
candidates = [c for c in (declared + inferred) if c.capability_id not in confirmed_ids]
|
||||||
|
return CompanyCapabilityProfile(
|
||||||
|
company_id=context.company_id,
|
||||||
|
capability_evidence=evidence,
|
||||||
|
candidate_capabilities=candidates,
|
||||||
|
confirmed_capabilities=confirmed,
|
||||||
|
)
|
||||||
@@ -0,0 +1,150 @@
|
|||||||
|
"""Company Intelligence (Phase 2A) — Company Capability Profile (domain objects).
|
||||||
|
|
||||||
|
This is the HEAD of the spine
|
||||||
|
|
||||||
|
Company -> (Operational) Capability -> Product -> Applicable Regulation ->
|
||||||
|
Obligation -> Procedure -> Evidence
|
||||||
|
|
||||||
|
and answers a DIFFERENT question than Regulatory Intelligence: not "which laws
|
||||||
|
apply to my product" but "which capabilities does my company already have, and
|
||||||
|
which regulatory obligations might they already cover".
|
||||||
|
|
||||||
|
HARD RULE (structural, not convention): a capability derived from a certification
|
||||||
|
is at most INFERRED — never CONFIRMED, never "erfuellt". A certification produces
|
||||||
|
EVIDENCE for a capability, an inference produces a CANDIDATE, and only checked
|
||||||
|
evidence produces a CONFIRMED capability. This keeps the company side inside
|
||||||
|
Welt 1 (potential), mirroring `ClaimCoverage` on the obligation side; it is NOT a
|
||||||
|
conformity verdict (`ComplianceStatus`, Welt 2, owned by Compliance Execution).
|
||||||
|
|
||||||
|
OWNERSHIP: Reasoning OWNS this CompanyContext container + the trust-state machine.
|
||||||
|
It does NOT own the Certification->Capability mapping RULES — those are the same
|
||||||
|
kind of rule as Feature->Capability and belong to the Compliance Execution
|
||||||
|
Capability Registry. This layer only CONSUMES `OperationalCapabilityCandidate`
|
||||||
|
{capability_id, source, confidence, verification_status} via an injected mapping
|
||||||
|
(see contract.py). No mapping DATA lives in product code (tests inject mocks).
|
||||||
|
|
||||||
|
Application/reasoning types, NOT compliance-meta-model classes (architecture
|
||||||
|
freeze v1.0 untouched). Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import Confidence
|
||||||
|
|
||||||
|
|
||||||
|
class VerificationStatus(str, Enum):
|
||||||
|
"""Trust state of an operational capability — a FOURTH vocabulary.
|
||||||
|
|
||||||
|
Disjoint from ClaimCoverage (Welt 1, customer claim vs obligation),
|
||||||
|
ComplianceStatus (Welt 2, verified conformity) and DeltaType (RCI). It says
|
||||||
|
only how well-established a company CAPABILITY is, never whether an obligation
|
||||||
|
is met. Progression: DECLARED (customer says) -> INFERRED (a certification
|
||||||
|
implies it) -> CONFIRMED (checked against real evidence); UNKNOWN = no signal.
|
||||||
|
"""
|
||||||
|
|
||||||
|
DECLARED = "declared"
|
||||||
|
INFERRED = "inferred"
|
||||||
|
CONFIRMED = "confirmed"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
# ── raw company inputs (the CompanyContext children) ──────────────────────
|
||||||
|
class Certification(BaseModel):
|
||||||
|
certification_id: str # e.g. "ISO27001"
|
||||||
|
name: str = ""
|
||||||
|
scope: str = "" # what the cert covers, customer-stated
|
||||||
|
|
||||||
|
|
||||||
|
class Declaration(BaseModel):
|
||||||
|
"""A customer statement that they have a capability ("we do patch management")."""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
statement: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class ExistingProcess(BaseModel):
|
||||||
|
process_id: str
|
||||||
|
name: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class ExistingSystem(BaseModel):
|
||||||
|
system_id: str
|
||||||
|
name: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class ExistingEvidence(BaseModel):
|
||||||
|
"""A concrete artefact the company already holds (policy, audit log, SBOM ...).
|
||||||
|
|
||||||
|
`proves_capability_id` is the ONLY thing that may lift a capability to
|
||||||
|
CONFIRMED — and only when a human/engine has attached real evidence.
|
||||||
|
"""
|
||||||
|
|
||||||
|
evidence_id: str
|
||||||
|
evidence_type: str = "" # config_export/test_report/policy/audit_log/...
|
||||||
|
proves_capability_id: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ── intermediate: certification -> evidence-of-capability (refinement 1) ──
|
||||||
|
class CapabilityEvidence(BaseModel):
|
||||||
|
"""A certification does not yield a capability directly — only EVIDENCE for one.
|
||||||
|
|
||||||
|
"Company holds a certified ISMS" is the evidence/claim; capabilities are then
|
||||||
|
INFERRED from it via the injected (Execution-owned) mapping, never directly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
source: str # provenance, e.g. "certification:ISO27001"
|
||||||
|
claim: str = ""
|
||||||
|
certification_id: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ── consumed contract type (refinement 2) ─────────────────────────────────
|
||||||
|
class OperationalCapabilityCandidate(BaseModel):
|
||||||
|
"""The ONLY thing Reasoning consumes from Execution's capability mapping.
|
||||||
|
|
||||||
|
Named "operational" (organisational ability) to stay distinct from later
|
||||||
|
Product/AI/Safety capabilities. A candidate is always Welt 1 — DECLARED or
|
||||||
|
INFERRED — and never CONFIRMED on its own.
|
||||||
|
"""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
source: str
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
|
verification_status: VerificationStatus = VerificationStatus.INFERRED
|
||||||
|
|
||||||
|
|
||||||
|
class OperationalCapability(BaseModel):
|
||||||
|
"""A capability the company actually has, CONFIRMED against real evidence."""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
verification_status: VerificationStatus
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
|
sources: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ── the container Reasoning OWNS (raw inputs) ─────────────────────────────
|
||||||
|
class CompanyContext(BaseModel):
|
||||||
|
company_id: str
|
||||||
|
certifications: List[Certification] = Field(default_factory=list)
|
||||||
|
declarations: List[Declaration] = Field(default_factory=list)
|
||||||
|
processes: List[ExistingProcess] = Field(default_factory=list)
|
||||||
|
systems: List[ExistingSystem] = Field(default_factory=list)
|
||||||
|
evidence: List[ExistingEvidence] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ── derived view (the Company Capability Profile) ─────────────────────────
|
||||||
|
class CompanyCapabilityProfile(BaseModel):
|
||||||
|
"""Derived: capability evidence + candidates (declared/inferred) + confirmed.
|
||||||
|
|
||||||
|
`candidate_capabilities` NEVER auto-promote to `confirmed_capabilities`; only
|
||||||
|
explicit ExistingEvidence does that. The hard rule is enforced in engine.py.
|
||||||
|
"""
|
||||||
|
|
||||||
|
company_id: str
|
||||||
|
capability_evidence: List[CapabilityEvidence] = Field(default_factory=list)
|
||||||
|
candidate_capabilities: List[OperationalCapabilityCandidate] = Field(default_factory=list)
|
||||||
|
confirmed_capabilities: List[OperationalCapability] = Field(default_factory=list)
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
"""Regulatory Completeness — auditable knowledge coverage, not confidence.
|
||||||
|
|
||||||
|
An internal quality machine: for an assessment it reports identified vs assessed regulations and
|
||||||
|
justifies every open or excluded domain (corpus gap -> future_corpus; applicability uncertain ->
|
||||||
|
query_required). The metric is counts, never a single percentage. The product never claims full
|
||||||
|
coverage — it makes its own knowledge state transparent and auditable. Deterministic, no LLM, no
|
||||||
|
new corpus/meta-model class (freeze v1.0).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import assess_completeness
|
||||||
|
from .schemas import (
|
||||||
|
Assumption, CompletenessReport, CorpusStatus, DomainCoverage, Exclusion,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"assess_completeness",
|
||||||
|
"CompletenessReport",
|
||||||
|
"CorpusStatus",
|
||||||
|
"DomainCoverage",
|
||||||
|
"Exclusion",
|
||||||
|
"Assumption",
|
||||||
|
]
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
"""Regulatory Completeness Engine — measure auditable knowledge coverage for an assessment.
|
||||||
|
|
||||||
|
Separates what we IDENTIFIED (triggered regulations) from what we ASSESSED (validated corpus AND
|
||||||
|
determined applicability), and justifies every gap. Two kinds of „open":
|
||||||
|
- corpus gap — no validated corpus yet (e.g. Environmental) -> future_corpus
|
||||||
|
- applicability open — corpus exists but applicability is uncertain (Data Act) -> query_required
|
||||||
|
The metric is COUNTS, never a single percentage. The audit statement says plainly „wir bewerteten M
|
||||||
|
von N Domänen; K sind nicht im validierten Korpus und wurden bewusst nicht bewertet".
|
||||||
|
|
||||||
|
Deterministic, computed-not-stored, no LLM, no new corpus/meta-model class (freeze v1.0). Python 3.9.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
Assumption, CompletenessReport, CorpusStatus, DomainCoverage, Exclusion,
|
||||||
|
)
|
||||||
|
|
||||||
|
_VALID = {s.value for s in CorpusStatus}
|
||||||
|
|
||||||
|
|
||||||
|
def _status(corpus_status: Dict[str, str], reg: str) -> CorpusStatus:
|
||||||
|
raw = corpus_status.get(reg, "unknown")
|
||||||
|
return CorpusStatus(raw) if raw in _VALID else CorpusStatus.UNKNOWN
|
||||||
|
|
||||||
|
|
||||||
|
def assess_completeness(
|
||||||
|
identified_regulations: List[str],
|
||||||
|
corpus_status: Dict[str, str],
|
||||||
|
uncertain: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
assumptions: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
assessed_obligations: int = 0,
|
||||||
|
) -> CompletenessReport:
|
||||||
|
"""Build the auditable coverage report.
|
||||||
|
|
||||||
|
`identified_regulations`: triggered/identified for this product. `corpus_status`: regulation ->
|
||||||
|
one of validated/draft/unsupported/unknown (curated/injected corpus registry). `uncertain`:
|
||||||
|
applicability-uncertain regulations [{regulation, deciding_question, reason}]. `assumptions`:
|
||||||
|
[{key, value, note}]. `assessed_obligations`: count from Execution (injected, default 0).
|
||||||
|
"""
|
||||||
|
ids = sorted(set(identified_regulations))
|
||||||
|
unc = uncertain or []
|
||||||
|
unc_subjects = {str(u.get("regulation") or u.get("subject")) for u in unc if (u.get("regulation") or u.get("subject"))}
|
||||||
|
|
||||||
|
coverage = [DomainCoverage(regulation=r, status=_status(corpus_status, r)) for r in ids]
|
||||||
|
assessed = [r for r in ids if _status(corpus_status, r) == CorpusStatus.VALIDATED and r not in unc_subjects]
|
||||||
|
open_regs = [r for r in ids if r not in assessed]
|
||||||
|
open_corpora = [r for r in ids if _status(corpus_status, r) in (CorpusStatus.UNSUPPORTED, CorpusStatus.UNKNOWN)]
|
||||||
|
|
||||||
|
exclusions: List[Exclusion] = []
|
||||||
|
for u in unc:
|
||||||
|
subj = str(u.get("regulation") or u.get("subject") or "")
|
||||||
|
if not subj:
|
||||||
|
continue
|
||||||
|
exclusions.append(Exclusion(
|
||||||
|
subject=subj, reason=str(u.get("reason", "Anwendbarkeit unsicher")),
|
||||||
|
deciding_question=str(u.get("deciding_question", "")), resolution="query_required"))
|
||||||
|
for r in open_regs:
|
||||||
|
if r in unc_subjects:
|
||||||
|
continue
|
||||||
|
st = _status(corpus_status, r)
|
||||||
|
if st == CorpusStatus.DRAFT:
|
||||||
|
exclusions.append(Exclusion(subject=r, reason="Korpus in Bearbeitung (draft)", resolution="in_review"))
|
||||||
|
else:
|
||||||
|
exclusions.append(Exclusion(subject=r, reason="nicht im validierten Korpus", resolution="future_corpus"))
|
||||||
|
|
||||||
|
covered_subjects = {e.subject for e in exclusions}
|
||||||
|
justification = (not open_regs) or set(open_regs) <= covered_subjects
|
||||||
|
assumptions_m = [Assumption(key=str(a.get("key", "")), value=str(a.get("value", "")), note=str(a.get("note", ""))) for a in (assumptions or [])]
|
||||||
|
|
||||||
|
summary = "Identifiziert %d · bewertet %d · offen %d · Unsicherheiten %d · Begründung %s" % (
|
||||||
|
len(ids), len(assessed), len(open_regs), len(unc), "ja" if justification else "nein")
|
||||||
|
if open_regs:
|
||||||
|
audit = (
|
||||||
|
"Für dieses Produkt konnten wir %d von %d identifizierten regulatorischen Domänen vollständig "
|
||||||
|
"bewerten. %d weitere %s noch nicht Bestandteil des validierten Korpus bzw. anwendungsunsicher "
|
||||||
|
"und wurden deshalb bewusst nicht bewertet." % (
|
||||||
|
len(assessed), len(ids), len(open_regs), "ist" if len(open_regs) == 1 else "sind"))
|
||||||
|
else:
|
||||||
|
audit = "Für dieses Produkt konnten wir alle %d identifizierten regulatorischen Domänen vollständig bewerten." % len(ids)
|
||||||
|
|
||||||
|
return CompletenessReport(
|
||||||
|
identified_regulations=ids, assessed_regulations=assessed, open_regulations=open_regs,
|
||||||
|
open_corpora=open_corpora, coverage=coverage, assumptions=assumptions_m, exclusions=exclusions,
|
||||||
|
uncertainties_count=len(unc), assessed_obligations=assessed_obligations,
|
||||||
|
justification_present=justification, completeness_summary=summary, audit_statement=audit,
|
||||||
|
)
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
"""Schemas for the Regulatory Completeness Engine — auditable knowledge-coverage, not confidence.
|
||||||
|
|
||||||
|
For an assessment it answers „wie sicher sind wir, dass diese Antwort VOLLSTÄNDIG ist?" by separating
|
||||||
|
IDENTIFIED regulations from ASSESSED ones (those in the validated corpus) and listing every open or
|
||||||
|
excluded domain WITH a reason. The metric is counts, never a single „87%". This is an internal quality
|
||||||
|
machine: the product never claims full coverage — it makes its own knowledge state transparent.
|
||||||
|
Deterministic, computed-not-stored, no new meta-model class (freeze v1.0). Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class CorpusStatus(str, Enum):
|
||||||
|
"""The maturity of our knowledge corpus for a regulation/domain."""
|
||||||
|
|
||||||
|
VALIDATED = "validated" # we can fully assess this
|
||||||
|
DRAFT = "draft" # partial / under review
|
||||||
|
UNSUPPORTED = "unsupported" # triggered but no corpus yet
|
||||||
|
UNKNOWN = "unknown" # not in our registry at all
|
||||||
|
|
||||||
|
|
||||||
|
class DomainCoverage(BaseModel):
|
||||||
|
regulation: str
|
||||||
|
status: CorpusStatus = CorpusStatus.UNKNOWN
|
||||||
|
note: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class Exclusion(BaseModel):
|
||||||
|
"""A domain/regulation DELIBERATELY not assessed — always with a reason (the heart of the engine)."""
|
||||||
|
|
||||||
|
subject: str
|
||||||
|
reason: str
|
||||||
|
deciding_question: str = "" # what would resolve it (if a query)
|
||||||
|
resolution: str = "future_corpus" # query_required | future_corpus | not_applicable
|
||||||
|
|
||||||
|
|
||||||
|
class Assumption(BaseModel):
|
||||||
|
key: str
|
||||||
|
value: str = ""
|
||||||
|
note: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class CompletenessReport(BaseModel):
|
||||||
|
"""The auditable coverage report for one assessment — counts + justification, NO single percentage."""
|
||||||
|
|
||||||
|
identified_regulations: List[str] = Field(default_factory=list)
|
||||||
|
assessed_regulations: List[str] = Field(default_factory=list) # in the validated corpus
|
||||||
|
open_regulations: List[str] = Field(default_factory=list) # identified but not validated
|
||||||
|
open_corpora: List[str] = Field(default_factory=list) # missing domains worth building
|
||||||
|
coverage: List[DomainCoverage] = Field(default_factory=list)
|
||||||
|
assumptions: List[Assumption] = Field(default_factory=list)
|
||||||
|
exclusions: List[Exclusion] = Field(default_factory=list)
|
||||||
|
uncertainties_count: int = 0
|
||||||
|
assessed_obligations: int = 0 # injected (Execution-owned)
|
||||||
|
justification_present: bool = False
|
||||||
|
completeness_summary: str = "" # "Identifiziert N · bewertet M · offen K · ..."
|
||||||
|
audit_statement: str = "" # the honest narrative sentence
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
"""Interpretation-in-Map — evaluate a customer interpretation within the map.
|
||||||
|
|
||||||
|
Thin adapter over the existing `assess_interpretation`: it judges the customer's
|
||||||
|
reading against the regulations/obligations actually present in the product's
|
||||||
|
RegulatoryMap, and flags touched unsupported domains as future_corpus_needed
|
||||||
|
instead of pseudo-evaluating them. No new legal reasoning, no RCI, no UI.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .adapter import interpret_in_map
|
||||||
|
from .schemas import InterpretationInMapRequest, InterpretationInMapResult
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"interpret_in_map",
|
||||||
|
"InterpretationInMapRequest",
|
||||||
|
"InterpretationInMapResult",
|
||||||
|
]
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
"""Interpretation-in-Map adapter (step 5).
|
||||||
|
|
||||||
|
Evaluates a customer interpretation WITHIN the already-built RegulatoryMap. It
|
||||||
|
reuses the existing `assess_interpretation` (no new legal engine), restricts the
|
||||||
|
affected regulations/obligations to those present in the map, and reports any
|
||||||
|
touched unsupported domain (wastewater/chemicals/...) as future_corpus_needed
|
||||||
|
rather than pseudo-evaluating it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import InterpretationVerdict
|
||||||
|
from compliance.reasoning.interpretation_engine import assess_interpretation
|
||||||
|
from compliance.regulatory_map.schemas import RegulatoryMap
|
||||||
|
|
||||||
|
from .schemas import InterpretationInMapResult
|
||||||
|
|
||||||
|
_LABEL: Dict[InterpretationVerdict, str] = {
|
||||||
|
InterpretationVerdict.PLAUSIBLE: "plausibel",
|
||||||
|
InterpretationVerdict.TOO_NARROW: "zu eng",
|
||||||
|
InterpretationVerdict.TOO_BROAD: "zu weit",
|
||||||
|
InterpretationVerdict.PARTIALLY_CORRECT: "teilweise korrekt",
|
||||||
|
InterpretationVerdict.UNSUPPORTED: "nicht belegt",
|
||||||
|
InterpretationVerdict.UNCERTAIN: "unsicher",
|
||||||
|
}
|
||||||
|
|
||||||
|
# domain -> keywords that signal the interpretation is ABOUT that (uncovered) domain.
|
||||||
|
_ENV_KEYWORDS: Dict[str, List[str]] = {
|
||||||
|
"environment_water": ["abwasser", "wastewater", "gewässer", "gewaesser", "einleitung", "abfluss"],
|
||||||
|
"chemicals": ["chemikalie", "reach", "clp", "reinigungsmittel", "biozid", "gefahrstoff", "detergenz", "lösemittel", "loesemittel"],
|
||||||
|
"environment_air": ["luft", "emission", "voc", "immission", "abluft", "verbrennung"],
|
||||||
|
"waste": ["abfall", "entsorgung", "weee", "recycling"],
|
||||||
|
"energy_resources": ["energie", "ökodesign", "oekodesign", "verbrauch"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _touches(text: str, domain: str) -> bool:
|
||||||
|
low = text.lower()
|
||||||
|
return any(kw in low for kw in _ENV_KEYWORDS.get(domain, []))
|
||||||
|
|
||||||
|
|
||||||
|
def _explain(label: str, detail: str, affected_regs: List[str], future_domains: List[str], in_scope: bool) -> str:
|
||||||
|
base = "Ihre Interpretation ist wahrscheinlich %s." % label
|
||||||
|
if detail:
|
||||||
|
base += " " + detail
|
||||||
|
if affected_regs:
|
||||||
|
base += " Betroffen in Ihrer Map: %s." % ", ".join(affected_regs)
|
||||||
|
if future_domains:
|
||||||
|
base += (
|
||||||
|
" Für %s liegt noch kein Regelkorpus vor — diese Aspekte werden nicht bewertet (future_corpus_needed)."
|
||||||
|
% ", ".join(future_domains)
|
||||||
|
)
|
||||||
|
if not in_scope and not future_domains:
|
||||||
|
base += " Diese Auslegung betrifft kein Regelwerk Ihrer aktuellen Produkt-Map."
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
def interpret_in_map(reg_map: RegulatoryMap, interpretation: str) -> InterpretationInMapResult:
|
||||||
|
a = assess_interpretation(interpretation) # existing engine — no new reasoning
|
||||||
|
|
||||||
|
map_reg_ids = (
|
||||||
|
{v.regulation_id for v in reg_map.applicable_regulations}
|
||||||
|
| {v.regulation_id for v in reg_map.uncertain_regulations}
|
||||||
|
| {v.regulation_id for v in reg_map.excluded_regulations}
|
||||||
|
)
|
||||||
|
map_ob_ids = {o.obligation_id for v in reg_map.applicable_regulations for o in v.obligations}
|
||||||
|
uncertain_ids = {v.regulation_id for v in reg_map.uncertain_regulations}
|
||||||
|
|
||||||
|
affected_regs = [r for r in a.affected_regulations if r in map_reg_ids]
|
||||||
|
affected_obs = [o for o in a.affected_obligations if o in map_ob_ids]
|
||||||
|
related_unc = [r for r in a.affected_regulations if r in uncertain_ids]
|
||||||
|
future = [d for d in reg_map.unsupported_domains if _touches(interpretation, d.domain)]
|
||||||
|
in_scope = bool(affected_regs or affected_obs)
|
||||||
|
|
||||||
|
return InterpretationInMapResult(
|
||||||
|
raw_interpretation=interpretation,
|
||||||
|
assessment=a.assessment,
|
||||||
|
in_scope_of_map=in_scope,
|
||||||
|
affected_regulations=affected_regs,
|
||||||
|
affected_obligations=affected_obs,
|
||||||
|
related_uncertainties=related_unc,
|
||||||
|
future_corpus_domains=future,
|
||||||
|
corrected_interpretation=a.corrected_interpretation,
|
||||||
|
risks=a.risks,
|
||||||
|
legal_basis_refs=a.legal_basis_refs,
|
||||||
|
explanation=_explain(_LABEL[a.assessment], a.explanation, affected_regs, [d.domain for d in future], in_scope),
|
||||||
|
confidence=a.confidence,
|
||||||
|
)
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
"""Schemas for Interpretation-in-Map (step 5).
|
||||||
|
|
||||||
|
A thin adapter that evaluates a customer interpretation WITHIN the already-built
|
||||||
|
RegulatoryMap — it does not assess abstract legal questions. Application types
|
||||||
|
only; no compliance-meta-model classes (freeze v1.0 untouched).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.product_scope.schemas import UnsupportedDomain
|
||||||
|
from compliance.profile.canonical import CanonicalProductRegulatoryProfile
|
||||||
|
from compliance.reasoning.enums import Confidence, InterpretationVerdict
|
||||||
|
|
||||||
|
|
||||||
|
class InterpretationInMapRequest(BaseModel):
|
||||||
|
product_profile: CanonicalProductRegulatoryProfile
|
||||||
|
customer_interpretation: str
|
||||||
|
|
||||||
|
|
||||||
|
class InterpretationInMapResult(BaseModel):
|
||||||
|
raw_interpretation: str
|
||||||
|
assessment: InterpretationVerdict
|
||||||
|
in_scope_of_map: bool # True if it touches a regulation/obligation present in the map
|
||||||
|
affected_regulations: List[str] = Field(default_factory=list) # intersected with the map
|
||||||
|
affected_obligations: List[str] = Field(default_factory=list) # intersected (registry-linked)
|
||||||
|
related_uncertainties: List[str] = Field(default_factory=list) # map-uncertain regs it touches
|
||||||
|
future_corpus_domains: List[UnsupportedDomain] = Field(default_factory=list) # NOT evaluated
|
||||||
|
corrected_interpretation: str = ""
|
||||||
|
risks: List[str] = Field(default_factory=list)
|
||||||
|
legal_basis_refs: List[str] = Field(default_factory=list)
|
||||||
|
explanation: str = ""
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
"""Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
|
||||||
|
|
||||||
|
The third independent function of the pipeline (after Company 2A `Evidence -> Capability` and RS-005
|
||||||
|
`Capability -> Delta`): given ONLY the Capability Delta, rank the known journeys that best EXPLAIN it.
|
||||||
|
A Journey is an EXPLANATION of the delta, not its cause — order is `Goal -> Required -> Delta -> Journey`.
|
||||||
|
|
||||||
|
Deliberately dumb + deterministic (pure set overlap; no ML/embeddings/LLM), fully auditable, signatures
|
||||||
|
INJECTED (certificate-agnostic capability clusters). No new corpus, no graph (freeze v1.0). The Matcher
|
||||||
|
is sanctioned as the last architectural building block; everything after is knowledge work.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import match_journeys
|
||||||
|
from .schemas import (
|
||||||
|
JourneyMatch,
|
||||||
|
JourneyMatchReason,
|
||||||
|
JourneyMatchResult,
|
||||||
|
JourneySignature,
|
||||||
|
MatchContext,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"match_journeys",
|
||||||
|
"JourneySignature",
|
||||||
|
"MatchContext",
|
||||||
|
"JourneyMatch",
|
||||||
|
"JourneyMatchReason",
|
||||||
|
"JourneyMatchResult",
|
||||||
|
]
|
||||||
@@ -0,0 +1,94 @@
|
|||||||
|
"""Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
|
||||||
|
|
||||||
|
Three INDEPENDENT functions now compose the pipeline, each a different problem, all interchangeable:
|
||||||
|
1. Evidence -> Capability (Company 2A)
|
||||||
|
2. Capability -> Delta (RS-005, transition_reasoning)
|
||||||
|
3. Delta -> Journey (THIS module)
|
||||||
|
|
||||||
|
The paradigm shift: a Journey is no longer the CAUSE (Goal -> Journey -> Delta) but the EXPLANATION
|
||||||
|
(Goal -> Required -> Delta -> Journey). The matcher does NOT look at certifications, regulations,
|
||||||
|
tenders, OEM specs or the goal — it looks ONLY at the Capability Delta and asks: which known journeys
|
||||||
|
describe exactly this delta? Output is a ranked, auditable explanation ("Journey A explains 82% of the
|
||||||
|
delta, because 8 of 10 missing capabilities are identical, same target type, ...").
|
||||||
|
|
||||||
|
Deliberately DUMB and deterministic: pure set overlap, NO ML, NO embeddings, NO LLM. A learning ranker
|
||||||
|
can be layered ON TOP later; this core stays auditable. Journey signatures are INJECTED (certificate-
|
||||||
|
agnostic capability clusters), never loaded here — the engine stays hermetic. No new corpus, no
|
||||||
|
graph/meta-model class (freeze v1.0). Python 3.9 compatible.
|
||||||
|
|
||||||
|
Honesty: `score` is the share of the DELTA a journey explains (recall over the customer's missing
|
||||||
|
capabilities), never a "fit" or a compliance verdict. `journey_only` documents where a journey reaches
|
||||||
|
BEYOND this delta, so a broad journey that explains everything is not silently preferred.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Optional, Sequence
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
JourneyMatch,
|
||||||
|
JourneyMatchReason,
|
||||||
|
JourneyMatchResult,
|
||||||
|
JourneySignature,
|
||||||
|
MatchContext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _context_signals(journey: JourneySignature, context: Optional[MatchContext]) -> List[str]:
|
||||||
|
"""Corroborating reasons only — these are documented, they never change the score."""
|
||||||
|
if context is None:
|
||||||
|
return []
|
||||||
|
signals: List[str] = []
|
||||||
|
if context.target_type and journey.target_type and context.target_type == journey.target_type:
|
||||||
|
signals.append("gleiche Zielart")
|
||||||
|
if context.industry and journey.industry and context.industry == journey.industry:
|
||||||
|
signals.append("gleiche Branche")
|
||||||
|
if context.product_type and journey.product_type and context.product_type == journey.product_type:
|
||||||
|
signals.append("gleicher Produkttyp")
|
||||||
|
return signals
|
||||||
|
|
||||||
|
|
||||||
|
def match_journeys(
|
||||||
|
delta: Sequence[str],
|
||||||
|
journeys: Sequence[JourneySignature],
|
||||||
|
context: Optional[MatchContext] = None,
|
||||||
|
) -> JourneyMatchResult:
|
||||||
|
"""Rank known journeys by the share of the Capability Delta they EXPLAIN.
|
||||||
|
|
||||||
|
`delta` = the customer's MISSING capabilities (from RS-005). `journeys` = injected, certificate-
|
||||||
|
agnostic signatures. score = |delta INTERSECT pattern| / |delta|. Ranking is deterministic:
|
||||||
|
score desc, then context-signal count desc (corroboration only), then journey_id asc. Context
|
||||||
|
never changes the score — only the documented reasons. Pure; no I/O; computed-not-stored.
|
||||||
|
"""
|
||||||
|
delta_set = set(delta)
|
||||||
|
n = len(delta_set)
|
||||||
|
matches: List[JourneyMatch] = []
|
||||||
|
for j in journeys:
|
||||||
|
pattern = set(j.capability_pattern)
|
||||||
|
matched = sorted(delta_set & pattern)
|
||||||
|
score = (len(matched) / n) if n else 0.0
|
||||||
|
signals = _context_signals(j, context)
|
||||||
|
reason = JourneyMatchReason(
|
||||||
|
matched_capabilities=matched,
|
||||||
|
unexplained_delta=sorted(delta_set - pattern),
|
||||||
|
journey_only=sorted(pattern - delta_set),
|
||||||
|
context_signals=signals,
|
||||||
|
)
|
||||||
|
matches.append(
|
||||||
|
JourneyMatch(
|
||||||
|
journey_id=j.journey_id,
|
||||||
|
label=j.label,
|
||||||
|
score=round(score, 2),
|
||||||
|
explains="%d von %d fehlenden Capabilities" % (len(matched), n),
|
||||||
|
reason=reason,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
matches.sort(key=lambda m: (-m.score, -len(m.reason.context_signals), m.journey_id))
|
||||||
|
best = matches[0] if matches and matches[0].score > 0.0 else None
|
||||||
|
headline = (
|
||||||
|
"%d Journeys erklaeren das Delta; beste: %s (%d%% des Deltas)"
|
||||||
|
% (sum(1 for m in matches if m.score > 0.0), best.label, round(best.score * 100))
|
||||||
|
if best
|
||||||
|
else "Keine bekannte Journey erklaert dieses Delta (neue Journey-Kandidatin)"
|
||||||
|
)
|
||||||
|
return JourneyMatchResult(delta_size=n, matches=matches, best=best, headline=headline)
|
||||||
@@ -0,0 +1,66 @@
|
|||||||
|
"""Schemas for the Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.
|
||||||
|
|
||||||
|
Derived views (computed-not-stored): nothing here is persisted; every match is recomputed from the
|
||||||
|
input delta + injected journey signatures each call. No new corpus, no graph (freeze v1.0).
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class JourneySignature(BaseModel):
|
||||||
|
"""A known journey described ONLY by its capability pattern (Input cluster -> Output cluster).
|
||||||
|
|
||||||
|
Deliberately certificate-/regulation-agnostic: the match uses `capability_pattern` alone. `label`
|
||||||
|
and the context fields exist for the human-auditable explanation, NEVER for the score. (Today the
|
||||||
|
signatures are derived from the transition patterns; the IDs like "ISO27001->CRA" are just one way
|
||||||
|
to describe the clusters — the matcher never reads them.)
|
||||||
|
"""
|
||||||
|
|
||||||
|
journey_id: str
|
||||||
|
label: str
|
||||||
|
capability_pattern: List[str] = Field(default_factory=list) # OUTPUT cluster: the delta this journey is about
|
||||||
|
assumed_capabilities: List[str] = Field(default_factory=list) # INPUT cluster: typically already present
|
||||||
|
industry: Optional[str] = None
|
||||||
|
product_type: Optional[str] = None
|
||||||
|
target_type: Optional[str] = None # context only: regulation / certification / contract / environmental
|
||||||
|
|
||||||
|
|
||||||
|
class MatchContext(BaseModel):
|
||||||
|
"""Optional corroborating context — surfaced as documented reasons, never part of the score."""
|
||||||
|
|
||||||
|
industry: Optional[str] = None
|
||||||
|
product_type: Optional[str] = None
|
||||||
|
target_type: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class JourneyMatchReason(BaseModel):
|
||||||
|
"""The auditable WHY behind one match — everything a reviewer needs, no opaque score."""
|
||||||
|
|
||||||
|
matched_capabilities: List[str] = Field(default_factory=list) # delta INTERSECT pattern (what it explains)
|
||||||
|
unexplained_delta: List[str] = Field(default_factory=list) # delta - pattern (what it does NOT explain)
|
||||||
|
journey_only: List[str] = Field(default_factory=list) # pattern - delta (journey covers, not needed here)
|
||||||
|
context_signals: List[str] = Field(default_factory=list) # "gleiche Zielart", "gleiche Branche", ...
|
||||||
|
|
||||||
|
|
||||||
|
class JourneyMatch(BaseModel):
|
||||||
|
"""One known journey, ranked by how much of the delta it EXPLAINS (not how well it 'fits')."""
|
||||||
|
|
||||||
|
journey_id: str
|
||||||
|
label: str
|
||||||
|
score: float = 0.0 # |delta INTERSECT pattern| / |delta|, 0..1: share of the delta explained
|
||||||
|
explains: str = "" # "8 von 10 fehlenden Capabilities"
|
||||||
|
reason: JourneyMatchReason
|
||||||
|
|
||||||
|
|
||||||
|
class JourneyMatchResult(BaseModel):
|
||||||
|
"""Ranked known journeys that EXPLAIN a Capability Delta. Journey = explanation, not cause."""
|
||||||
|
|
||||||
|
delta_size: int = 0
|
||||||
|
matches: List[JourneyMatch] = Field(default_factory=list) # ranked desc by score
|
||||||
|
best: Optional[JourneyMatch] = None
|
||||||
|
headline: str = ""
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
"""Knowledge Intake — classify an incoming document and assess its impact on existing knowledge.
|
||||||
|
|
||||||
|
The stage BEFORE the parser: no content extraction, only Einordnung. Intersects a document's signals
|
||||||
|
(regulations + keywords) with an index of the existing knowledge to emit a `KnowledgePackage` — which
|
||||||
|
capabilities / playbooks / patterns / reference scenarios / obligations it probably touches, whether
|
||||||
|
it is a new domain, and how much review it warrants. Deterministic, no LLM, no new corpus (freeze v1.0).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import assess_document_impact, build_knowledge_index
|
||||||
|
from .schemas import (
|
||||||
|
DocumentDescriptor, ImpactLevel, KnowledgeIndex, KnowledgePackage,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"build_knowledge_index",
|
||||||
|
"assess_document_impact",
|
||||||
|
"DocumentDescriptor",
|
||||||
|
"KnowledgeIndex",
|
||||||
|
"KnowledgePackage",
|
||||||
|
"ImpactLevel",
|
||||||
|
]
|
||||||
@@ -0,0 +1,111 @@
|
|||||||
|
"""Knowledge Intake — classify a document and assess its impact on existing knowledge.
|
||||||
|
|
||||||
|
The real Knowledge Production is not writing — it is TARGETED UPDATING: when 20 documents arrive,
|
||||||
|
which 5 actually change our knowledge and which 15 are ignorable? Intake answers this deterministically
|
||||||
|
by intersecting a document's signals (declared regulations + keywords) with an index of the existing
|
||||||
|
knowledge (capabilities, playbooks, transition patterns, reference scenarios, injected obligations).
|
||||||
|
It performs NO content extraction (that is the later parser stage) and uses NO LLM.
|
||||||
|
|
||||||
|
Pipeline: Knowledge Intake -> Knowledge Package -> Parser -> Draft Generator -> Review -> Published.
|
||||||
|
Pure, deterministic, computed-not-stored. No new corpus/meta-model class (freeze v1.0). Python 3.9.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional, Set
|
||||||
|
|
||||||
|
from .schemas import DocumentDescriptor, ImpactLevel, KnowledgeIndex, KnowledgePackage
|
||||||
|
|
||||||
|
|
||||||
|
def _targets(goal_to: Any) -> List[str]:
|
||||||
|
"""Extract target regulations from a transition_goal.to (single dict OR list of targets)."""
|
||||||
|
out: List[str] = []
|
||||||
|
items = goal_to if isinstance(goal_to, list) else [goal_to]
|
||||||
|
for it in items:
|
||||||
|
if isinstance(it, dict):
|
||||||
|
reg = it.get("regulation") or it.get("target") or it.get("framework")
|
||||||
|
if reg:
|
||||||
|
out.append(str(reg))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def build_knowledge_index(
|
||||||
|
patterns: List[Dict[str, Any]],
|
||||||
|
playbooks: List[Dict[str, Any]],
|
||||||
|
reference_scenarios: List[Dict[str, Any]],
|
||||||
|
obligation_index: Optional[Dict[str, List[str]]] = None,
|
||||||
|
) -> KnowledgeIndex:
|
||||||
|
"""Assemble the matching index from already-loaded knowledge dicts (file I/O stays in the caller)."""
|
||||||
|
tp: Dict[str, List[str]] = {}
|
||||||
|
cap_regs: Dict[str, List[str]] = {}
|
||||||
|
for p in patterns:
|
||||||
|
pid = str(p.get("id", ""))
|
||||||
|
targets = _targets(p.get("transition_goal", {}).get("to"))
|
||||||
|
if pid:
|
||||||
|
tp[pid] = targets
|
||||||
|
for item in list(p.get("likely_covered", [])) + list(p.get("delta_requirements", [])):
|
||||||
|
cap = item.get("capability")
|
||||||
|
if not cap:
|
||||||
|
continue
|
||||||
|
regs = [str(t) for t in item.get("covers_targets", [])] or targets
|
||||||
|
cap_regs.setdefault(str(cap), [])
|
||||||
|
cap_regs[str(cap)] = sorted(set(cap_regs[str(cap)]) | set(regs))
|
||||||
|
rts = {str(r.get("id", "")): _targets(r.get("transition_goal", {}).get("to")) for r in reference_scenarios}
|
||||||
|
rts.pop("", None)
|
||||||
|
obl = obligation_index or {}
|
||||||
|
regulations = sorted(
|
||||||
|
{t for ts in tp.values() for t in ts}
|
||||||
|
| {t for ts in rts.values() for t in ts}
|
||||||
|
| {t for ts in cap_regs.values() for t in ts}
|
||||||
|
| set(obl.keys())
|
||||||
|
)
|
||||||
|
return KnowledgeIndex(
|
||||||
|
regulations=regulations, capability_regulations=cap_regs,
|
||||||
|
playbook_capabilities=sorted({str(pb.get("capability_id", "")) for pb in playbooks} - {""}),
|
||||||
|
transition_patterns=tp, reference_scenarios=rts, obligation_index=dict(obl),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _kw_match(keywords: Set[str], capability: str) -> bool:
|
||||||
|
tokens = set(capability.lower().split("_"))
|
||||||
|
return bool(keywords & tokens) or capability.lower() in keywords
|
||||||
|
|
||||||
|
|
||||||
|
def assess_document_impact(descriptor: DocumentDescriptor, index: KnowledgeIndex) -> KnowledgePackage:
|
||||||
|
"""Classify the document and compute which existing knowledge it probably touches, and how much."""
|
||||||
|
doc_regs = set(descriptor.regulations)
|
||||||
|
known = set(index.regulations)
|
||||||
|
unknown = sorted(doc_regs - known)
|
||||||
|
new_domain = bool(doc_regs) and not (doc_regs & known)
|
||||||
|
kw = {k.lower() for k in descriptor.keywords}
|
||||||
|
|
||||||
|
caps = sorted(c for c, regs in index.capability_regulations.items() if (set(regs) & doc_regs) or _kw_match(kw, c))
|
||||||
|
playbooks = sorted(set(caps) & set(index.playbook_capabilities))
|
||||||
|
patterns = sorted(pid for pid, regs in index.transition_patterns.items() if set(regs) & doc_regs)
|
||||||
|
scenarios = sorted(rid for rid, regs in index.reference_scenarios.items() if set(regs) & doc_regs)
|
||||||
|
obligations = sorted({o for r in doc_regs for o in index.obligation_index.get(r, [])})
|
||||||
|
|
||||||
|
total = len(caps) + len(playbooks) + len(patterns) + len(scenarios) + len(obligations)
|
||||||
|
if new_domain:
|
||||||
|
level, rec = ImpactLevel.NEW_DOMAIN, "Neue Domäne — Corpus-Intake nötig (kein bestehendes Wissen betroffen)."
|
||||||
|
elif total == 0:
|
||||||
|
level, rec = ImpactLevel.NONE, "Wahrscheinlich ignorierbar — betrifft keinen bekannten Wissensbaustein."
|
||||||
|
elif len(caps) >= 3 or playbooks or len(obligations) >= 5:
|
||||||
|
level, rec = ImpactLevel.HIGH, "Gezielter Review priorisieren — hoher Impact auf bestehendes Wissen."
|
||||||
|
else:
|
||||||
|
level, rec = ImpactLevel.LOW, "Gezielter Review — geringer, eingegrenzter Impact."
|
||||||
|
|
||||||
|
summary = "Betrifft %d Capabilities, %d Playbooks, %d Patterns, %d Reference Scenarios, %d Obligations; %s." % (
|
||||||
|
len(caps), len(playbooks), len(patterns), len(scenarios), len(obligations),
|
||||||
|
"NEUE Domäne" if new_domain else "keine neue Domäne",
|
||||||
|
)
|
||||||
|
return KnowledgePackage(
|
||||||
|
document_id=descriptor.document_id,
|
||||||
|
classification={"regulations": sorted(doc_regs), "keywords": sorted(kw),
|
||||||
|
"document_type": [descriptor.document_type] if descriptor.document_type else []},
|
||||||
|
new_domain=new_domain, unknown_regulations=unknown,
|
||||||
|
affected_capabilities=caps, affected_playbooks=playbooks,
|
||||||
|
affected_transition_patterns=patterns, affected_reference_scenarios=scenarios,
|
||||||
|
affected_obligations=obligations, impact_level=level,
|
||||||
|
impact_summary=summary, recommendation=rec,
|
||||||
|
)
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
"""Schemas for Knowledge Intake — classify a new document and assess its IMPACT (no extraction yet).
|
||||||
|
|
||||||
|
Before the parser/draft stages, Intake answers „welche Teile unseres Wissensbestands sind überhaupt
|
||||||
|
betroffen?". It does NOT extract content — it only classifies the document and intersects its signals
|
||||||
|
with an index of the existing knowledge (capabilities, playbooks, transition patterns, reference
|
||||||
|
scenarios, injected obligations) to emit a `KnowledgePackage` (an impact analysis). Deterministic,
|
||||||
|
computed-not-stored, no new corpus, no new meta-model class (freeze v1.0). Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class ImpactLevel(str, Enum):
|
||||||
|
NONE = "none" # touches nothing known -> likely ignorable
|
||||||
|
LOW = "low" # touches a little -> targeted review
|
||||||
|
HIGH = "high" # touches a lot -> prioritise review
|
||||||
|
NEW_DOMAIN = "new_domain" # references only unknown regulations -> domain intake
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentDescriptor(BaseModel):
|
||||||
|
"""Lightweight signals of an incoming document — NO content body, only classification inputs."""
|
||||||
|
|
||||||
|
document_id: str
|
||||||
|
title: str = ""
|
||||||
|
source: str = "" # e.g. BSI, ENISA, EU
|
||||||
|
document_type: str = "" # e.g. guidance, faq, regulation, recommendation
|
||||||
|
regulations: List[str] = Field(default_factory=list) # declared regulations it references
|
||||||
|
keywords: List[str] = Field(default_factory=list) # lightweight topic signals (e.g. sbom)
|
||||||
|
product_types: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class KnowledgeIndex(BaseModel):
|
||||||
|
"""A deterministic index of the EXISTING knowledge to match an incoming document against."""
|
||||||
|
|
||||||
|
regulations: List[str] = Field(default_factory=list) # all regulations the corpus knows
|
||||||
|
capability_regulations: Dict[str, List[str]] = Field(default_factory=dict) # capability -> covers_targets
|
||||||
|
playbook_capabilities: List[str] = Field(default_factory=list) # capabilities that HAVE a playbook
|
||||||
|
transition_patterns: Dict[str, List[str]] = Field(default_factory=dict) # pattern_id -> target regulations
|
||||||
|
reference_scenarios: Dict[str, List[str]] = Field(default_factory=dict) # rts_id -> regulations
|
||||||
|
obligation_index: Dict[str, List[str]] = Field(default_factory=dict) # regulation -> obligation ids (INJECTED)
|
||||||
|
|
||||||
|
|
||||||
|
class KnowledgePackage(BaseModel):
|
||||||
|
"""The impact analysis for one document — what of our knowledge it probably touches, and how much."""
|
||||||
|
|
||||||
|
document_id: str
|
||||||
|
classification: Dict[str, List[str]] = Field(default_factory=dict) # echoed regulations/keywords/types
|
||||||
|
new_domain: bool = False
|
||||||
|
unknown_regulations: List[str] = Field(default_factory=list)
|
||||||
|
affected_capabilities: List[str] = Field(default_factory=list)
|
||||||
|
affected_playbooks: List[str] = Field(default_factory=list)
|
||||||
|
affected_transition_patterns: List[str] = Field(default_factory=list)
|
||||||
|
affected_reference_scenarios: List[str] = Field(default_factory=list)
|
||||||
|
affected_obligations: List[str] = Field(default_factory=list)
|
||||||
|
impact_level: ImpactLevel = ImpactLevel.NONE
|
||||||
|
impact_summary: str = ""
|
||||||
|
recommendation: str = ""
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
"""Knowledge Production — deterministically prepare the corpus, then curate it.
|
||||||
|
|
||||||
|
The corpus is not written by hand: the Playbook Draft Generator structures drafts from data the
|
||||||
|
software already owns (Transition Pattern + leverage + injected Execution controls), leaving the
|
||||||
|
practitioner know-how as TODO for expert review. Mirrors the legal pipeline (Parser -> Review).
|
||||||
|
Deterministic, no LLM in core, no new corpus, no new meta-model class (freeze v1.0).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import drafts_from_pattern, generate_playbook_draft
|
||||||
|
from .schemas import DraftStatus, PlaybookDraft
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"generate_playbook_draft",
|
||||||
|
"drafts_from_pattern",
|
||||||
|
"PlaybookDraft",
|
||||||
|
"DraftStatus",
|
||||||
|
]
|
||||||
@@ -0,0 +1,91 @@
|
|||||||
|
"""Knowledge Production — the Playbook Draft Generator (deterministic assembly + expert review).
|
||||||
|
|
||||||
|
Mirrors the legal pipeline (Gesetz -> Parser -> Obligation -> Review) for BreakPilot's OWN knowledge:
|
||||||
|
new Capability -> Registry -> Transition Pattern -> **Playbook Draft Generator** -> Expert Review ->
|
||||||
|
versioned Playbook. The generator does not WRITE playbooks — it STRUCTURES drafts from data the
|
||||||
|
software already owns (a transition/convergence pattern's delta requirement: why_asked, covers_targets,
|
||||||
|
expected_evidence) plus injected Execution controls. The practitioner know-how (tools / process steps /
|
||||||
|
how others do it) is left as an explicit TODO for the expert (or a separate offline-propose step).
|
||||||
|
|
||||||
|
Fully deterministic, NO LLM in the core (deterministic-first: any model enrichment is offline,
|
||||||
|
advisory, never in this assembly). No new corpus, no new meta-model class (freeze v1.0). Python 3.9.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from .schemas import DraftStatus, PlaybookDraft
|
||||||
|
|
||||||
|
_SOFT_FIELDS = ["tools", "process_steps", "how_others_do_it"] # practitioner know-how — expert/offline-propose
|
||||||
|
_DISCLAIMER = (
|
||||||
|
"Maschinell assemblierter ENTWURF aus vorhandenen Daten (Transition Pattern + Leverage + "
|
||||||
|
"injizierte Controls). KEINE normative Anforderung; erfordert fachliche Kuratierung (TODO-Felder) "
|
||||||
|
"und Statuswechsel draft_generated -> reviewed -> validated."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_playbook_draft(
|
||||||
|
capability_id: str,
|
||||||
|
requirement: Optional[Dict[str, Any]] = None,
|
||||||
|
control_links: Optional[List[str]] = None,
|
||||||
|
) -> PlaybookDraft:
|
||||||
|
"""Assemble a playbook draft for ONE capability from a pattern delta requirement (deterministic).
|
||||||
|
|
||||||
|
`requirement`: a delta_requirement dict (why_asked / covers_targets / expected_evidence). Owned
|
||||||
|
fields are filled with provenance; soft fields are listed in `todo`. `control_links`: injected
|
||||||
|
Execution controls (default empty — no Execution data in the draft generator).
|
||||||
|
"""
|
||||||
|
req = requirement or {}
|
||||||
|
why = str(req.get("why_asked") or req.get("missing_because") or "")
|
||||||
|
closes = sorted({str(t) for t in req.get("covers_targets", [])})
|
||||||
|
evidence = [str(e) for e in req.get("expected_evidence", [])]
|
||||||
|
controls = list(control_links or [])
|
||||||
|
|
||||||
|
provenance: Dict[str, str] = {}
|
||||||
|
todo: List[str] = []
|
||||||
|
if why:
|
||||||
|
provenance["why"] = "transition_pattern:why_asked"
|
||||||
|
else:
|
||||||
|
todo.append("why")
|
||||||
|
if closes:
|
||||||
|
provenance["closes_regulations"] = "leverage:covers_targets"
|
||||||
|
if evidence:
|
||||||
|
provenance["expected_evidence"] = "transition_pattern:expected_evidence"
|
||||||
|
else:
|
||||||
|
todo.append("expected_evidence")
|
||||||
|
if controls:
|
||||||
|
provenance["typical_controls"] = "execution:control_links"
|
||||||
|
todo.extend(_SOFT_FIELDS) # always expert-owned
|
||||||
|
|
||||||
|
return PlaybookDraft(
|
||||||
|
capability_id=capability_id,
|
||||||
|
status=DraftStatus.DRAFT_GENERATED,
|
||||||
|
title=capability_id.replace("_", " "),
|
||||||
|
why=why,
|
||||||
|
closes_regulations=closes,
|
||||||
|
expected_evidence=evidence,
|
||||||
|
typical_controls=controls,
|
||||||
|
provenance=provenance,
|
||||||
|
todo=todo,
|
||||||
|
disclaimer=_DISCLAIMER,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def drafts_from_pattern(
|
||||||
|
pattern: Dict[str, Any],
|
||||||
|
control_links_by_cap: Optional[Dict[str, List[str]]] = None,
|
||||||
|
) -> List[PlaybookDraft]:
|
||||||
|
"""Assemble one playbook draft per delta capability of a transition/convergence pattern.
|
||||||
|
|
||||||
|
This is the "produce drafts, don't write them" tool: feed a pattern -> get a draft per missing
|
||||||
|
capability, ready for expert review. Deterministic + order-preserving (pattern order).
|
||||||
|
"""
|
||||||
|
links = control_links_by_cap or {}
|
||||||
|
drafts: List[PlaybookDraft] = []
|
||||||
|
for d in pattern.get("delta_requirements", []):
|
||||||
|
cap = d.get("capability")
|
||||||
|
if not cap:
|
||||||
|
continue
|
||||||
|
drafts.append(generate_playbook_draft(str(cap), d, links.get(str(cap))))
|
||||||
|
return drafts
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
"""Schemas for Knowledge Production — deterministic draft assembly + lifecycle.
|
||||||
|
|
||||||
|
The corpus is no longer written by hand: it is deterministically PREPARED from data the software
|
||||||
|
already owns (Capability, Transition Pattern, Controls, Evidence, leverage), then curated by an
|
||||||
|
expert. A `PlaybookDraft` is a machine-assembled skeleton with per-field provenance and an explicit
|
||||||
|
TODO list of what still needs human (or offline-propose) input. No LLM in the deterministic core.
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class DraftStatus(str, Enum):
|
||||||
|
"""Freigabestatus — the knowledge lifecycle from machine draft to proven (mirrors the
|
||||||
|
transition-pattern / playbook maturity, with a machine-assembled pre-stage)."""
|
||||||
|
|
||||||
|
DRAFT_GENERATED = "draft_generated" # machine-assembled, NOT yet expert-touched
|
||||||
|
IN_REVIEW = "in_review" # an expert is curating it
|
||||||
|
REVIEWED = "reviewed" # internally reviewed
|
||||||
|
VALIDATED = "validated" # domain expert confirmed
|
||||||
|
PROVEN = "proven" # confirmed in the field
|
||||||
|
|
||||||
|
|
||||||
|
class PlaybookDraft(BaseModel):
|
||||||
|
"""A deterministically assembled playbook draft for one capability.
|
||||||
|
|
||||||
|
Owned fields (why / closes_regulations / expected_evidence / typical_controls) are filled from
|
||||||
|
existing data with provenance; the practitioner know-how (tools / process_steps / how_others)
|
||||||
|
is left as TODO. The expert reviews a draft instead of writing from a blank page.
|
||||||
|
"""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
status: DraftStatus = DraftStatus.DRAFT_GENERATED
|
||||||
|
title: str = ""
|
||||||
|
why: str = "" # from the transition pattern (why_asked/missing_because)
|
||||||
|
closes_regulations: List[str] = Field(default_factory=list) # from leverage (covers_targets)
|
||||||
|
expected_evidence: List[str] = Field(default_factory=list) # from the transition pattern
|
||||||
|
typical_controls: List[str] = Field(default_factory=list) # injected from Execution (may be empty)
|
||||||
|
provenance: Dict[str, str] = Field(default_factory=dict) # field -> source it was assembled from
|
||||||
|
todo: List[str] = Field(default_factory=list) # fields the expert/offline-propose must still add
|
||||||
|
disclaimer: str = "" # machine draft, requires expert curation
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
"""Product Regulatory Navigator — thin missing-facts layer.
|
||||||
|
|
||||||
|
Sits above the CanonicalProductRegulatoryProfile (prefilled from company-profile /
|
||||||
|
ProductWizard) and reports only which facts are still missing + prioritized
|
||||||
|
questions to collect them. It decides which facts are needed, NOT what regulation
|
||||||
|
applies — that stays with the Scope Engine (step 3). No regulation logic, no UI,
|
||||||
|
no Go, no RAG.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import CompletenessSummary, NavigatorResult, apply_answers, navigate
|
||||||
|
from .questions import (
|
||||||
|
QUESTION_CATALOG,
|
||||||
|
AnswerType,
|
||||||
|
NavigatorQuestion,
|
||||||
|
QuestionPriority,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"navigate",
|
||||||
|
"apply_answers",
|
||||||
|
"NavigatorResult",
|
||||||
|
"CompletenessSummary",
|
||||||
|
"NavigatorQuestion",
|
||||||
|
"AnswerType",
|
||||||
|
"QuestionPriority",
|
||||||
|
"QUESTION_CATALOG",
|
||||||
|
]
|
||||||
@@ -0,0 +1,116 @@
|
|||||||
|
"""Product Regulatory Navigator engine — missing-facts only.
|
||||||
|
|
||||||
|
`navigate(profile)` reports which canonical fields are still unknown and the
|
||||||
|
prioritized questions to fill them. `apply_answers(profile, answers)` returns the
|
||||||
|
updated profile. It NEVER decides what applies — that is the Scope Engine (step 3).
|
||||||
|
Pure field-presence checking; no scope-engine import, no regulation evaluation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Type
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.profile.canonical import (
|
||||||
|
CanonicalLifecyclePhase,
|
||||||
|
CanonicalProductRegulatoryProfile,
|
||||||
|
EconomicOperatorRole,
|
||||||
|
ProductComponent,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .questions import QUESTION_CATALOG, NavigatorQuestion, QuestionPriority
|
||||||
|
|
||||||
|
_ENUM_FIELDS: Dict[str, Type[Any]] = {
|
||||||
|
"economic_operator_role": EconomicOperatorRole,
|
||||||
|
"lifecycle_phase": CanonicalLifecyclePhase,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CompletenessSummary(BaseModel):
|
||||||
|
total_relevant: int
|
||||||
|
answered: int
|
||||||
|
missing: int
|
||||||
|
missing_by_priority: Dict[str, int] = Field(default_factory=dict)
|
||||||
|
ready_for_scope: bool # True once no P0 fact is missing
|
||||||
|
note: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class NavigatorResult(BaseModel):
|
||||||
|
missing_facts: List[str] = Field(default_factory=list) # canonical target fields
|
||||||
|
suggested_questions: List[NavigatorQuestion] = Field(default_factory=list)
|
||||||
|
completeness_summary: CompletenessSummary
|
||||||
|
|
||||||
|
|
||||||
|
def _value(profile: CanonicalProductRegulatoryProfile, dotted: str) -> Any:
|
||||||
|
if "." in dotted:
|
||||||
|
head, tail = dotted.split(".", 1)
|
||||||
|
return getattr(getattr(profile, head), tail, None)
|
||||||
|
return getattr(profile, dotted, None)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_unknown(profile: CanonicalProductRegulatoryProfile, q: NavigatorQuestion) -> bool:
|
||||||
|
value = _value(profile, q.target_field)
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
if isinstance(value, list) and not value:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def navigate(profile: CanonicalProductRegulatoryProfile) -> NavigatorResult:
|
||||||
|
missing = [q for q in QUESTION_CATALOG if _is_unknown(profile, q)]
|
||||||
|
missing.sort(key=lambda q: q.order())
|
||||||
|
|
||||||
|
by_priority: Dict[str, int] = {}
|
||||||
|
for q in missing:
|
||||||
|
by_priority[q.priority.value] = by_priority.get(q.priority.value, 0) + 1
|
||||||
|
ready = QuestionPriority.P0.value not in by_priority
|
||||||
|
|
||||||
|
total = len(QUESTION_CATALOG)
|
||||||
|
summary = CompletenessSummary(
|
||||||
|
total_relevant=total,
|
||||||
|
answered=total - len(missing),
|
||||||
|
missing=len(missing),
|
||||||
|
missing_by_priority=by_priority,
|
||||||
|
ready_for_scope=ready,
|
||||||
|
note=(
|
||||||
|
"%d von %d Fakten vorhanden; %d offen. Scope-Engine startklar: %s."
|
||||||
|
% (total - len(missing), total, len(missing), "ja" if ready else "nein (P0 fehlt)")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return NavigatorResult(
|
||||||
|
missing_facts=[q.target_field for q in missing],
|
||||||
|
suggested_questions=missing,
|
||||||
|
completeness_summary=summary,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce(q: NavigatorQuestion, value: Any) -> Any:
|
||||||
|
if q.target_field in _ENUM_FIELDS:
|
||||||
|
return _ENUM_FIELDS[q.target_field](value)
|
||||||
|
if q.target_field == "components":
|
||||||
|
return [c if isinstance(c, ProductComponent) else ProductComponent(**c) for c in (value or [])]
|
||||||
|
if q.answer_type.value in {"country_list", "multiselect"}:
|
||||||
|
return list(value or [])
|
||||||
|
if q.answer_type.value == "bool":
|
||||||
|
return bool(value)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def apply_answers(
|
||||||
|
profile: CanonicalProductRegulatoryProfile, answers: Dict[str, Any]
|
||||||
|
) -> CanonicalProductRegulatoryProfile:
|
||||||
|
updated = profile.model_copy(deep=True)
|
||||||
|
by_id = {q.question_id: q for q in QUESTION_CATALOG}
|
||||||
|
for question_id, raw in answers.items():
|
||||||
|
q = by_id.get(question_id)
|
||||||
|
if q is None or raw is None:
|
||||||
|
continue
|
||||||
|
value = _coerce(q, raw)
|
||||||
|
if "." in q.target_field:
|
||||||
|
head, tail = q.target_field.split(".", 1)
|
||||||
|
setattr(getattr(updated, head), tail, value)
|
||||||
|
else:
|
||||||
|
setattr(updated, q.target_field, value)
|
||||||
|
return updated
|
||||||
@@ -0,0 +1,171 @@
|
|||||||
|
"""Product Regulatory Navigator — question catalog.
|
||||||
|
|
||||||
|
The Navigator is a THIN missing-facts layer over CanonicalProductRegulatoryProfile.
|
||||||
|
It does NOT decide what applies — `regulatory_domains_unblocked` is static metadata
|
||||||
|
(which domains a fact would help the Scope Engine decide later), never an
|
||||||
|
evaluation. No regulation logic, no UI, no Go, no RAG.
|
||||||
|
|
||||||
|
`NavigatorQuestion` is an interaction type, NOT a compliance-meta-model class
|
||||||
|
(architecture freeze v1.0 untouched).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.profile.canonical import CanonicalLifecyclePhase, EconomicOperatorRole
|
||||||
|
|
||||||
|
|
||||||
|
class AnswerType(str, Enum):
|
||||||
|
BOOL = "bool"
|
||||||
|
ENUM = "enum"
|
||||||
|
MULTISELECT = "multiselect"
|
||||||
|
TEXT = "text"
|
||||||
|
COUNTRY_LIST = "country_list"
|
||||||
|
COMPONENT_LIST = "component_list"
|
||||||
|
|
||||||
|
|
||||||
|
class QuestionPriority(str, Enum):
|
||||||
|
P0 = "P0" # blocks scope: EU-vs-not, role, lifecycle, machine/component
|
||||||
|
P1 = "P1" # unblocks a specific domain: RED, Data Act, environment, security
|
||||||
|
P2 = "P2" # refinement: structured BOM
|
||||||
|
|
||||||
|
|
||||||
|
_PRIORITY_ORDER = {QuestionPriority.P0: 0, QuestionPriority.P1: 1, QuestionPriority.P2: 2}
|
||||||
|
|
||||||
|
|
||||||
|
class NavigatorQuestion(BaseModel):
|
||||||
|
question_id: str
|
||||||
|
target_field: str # dotted path into the canonical profile
|
||||||
|
label: str
|
||||||
|
why_needed: str
|
||||||
|
regulatory_domains_unblocked: List[str] = Field(default_factory=list)
|
||||||
|
answer_type: AnswerType
|
||||||
|
options: List[str] = Field(default_factory=list)
|
||||||
|
priority: QuestionPriority
|
||||||
|
|
||||||
|
def order(self) -> int:
|
||||||
|
return _PRIORITY_ORDER[self.priority]
|
||||||
|
|
||||||
|
|
||||||
|
_ROLE_OPTIONS = [e.value for e in EconomicOperatorRole]
|
||||||
|
_PHASE_OPTIONS = [e.value for e in CanonicalLifecyclePhase]
|
||||||
|
|
||||||
|
QUESTION_CATALOG: List[NavigatorQuestion] = [
|
||||||
|
# ── P0: block the scope decision itself ───────────────────────────
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="markets",
|
||||||
|
target_field="markets",
|
||||||
|
label="In welche Märkte / Länder liefern Sie das Produkt?",
|
||||||
|
why_needed="Bestimmt EU- vs. Nicht-EU-Anwendbarkeit und nationale Pflichten.",
|
||||||
|
regulatory_domains_unblocked=["cyber", "machine_safety", "data", "radio", "emv", "environment"],
|
||||||
|
answer_type=AnswerType.COUNTRY_LIST,
|
||||||
|
priority=QuestionPriority.P0,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="economic_operator_role",
|
||||||
|
target_field="economic_operator_role",
|
||||||
|
label="Welche Rolle nehmen Sie ein?",
|
||||||
|
why_needed="Pflichten hängen von der Rolle ab (Hersteller/Importeur/Händler/Betreiber/Service).",
|
||||||
|
regulatory_domains_unblocked=["cyber", "machine_safety", "data"],
|
||||||
|
answer_type=AnswerType.ENUM,
|
||||||
|
options=_ROLE_OPTIONS,
|
||||||
|
priority=QuestionPriority.P0,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="lifecycle_phase",
|
||||||
|
target_field="lifecycle_phase",
|
||||||
|
label="In welcher Lebenszyklusphase betrachten Sie das Produkt?",
|
||||||
|
why_needed="Manche Pflichten greifen nur beim Inverkehrbringen oder in der Wartung.",
|
||||||
|
regulatory_domains_unblocked=["cyber", "machine_safety"],
|
||||||
|
answer_type=AnswerType.ENUM,
|
||||||
|
options=_PHASE_OPTIONS,
|
||||||
|
priority=QuestionPriority.P0,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="is_machine",
|
||||||
|
target_field="is_machine",
|
||||||
|
label="Ist das Produkt eine (vollständige) Maschine?",
|
||||||
|
why_needed="Entscheidet die Anwendbarkeit der Maschinenverordnung.",
|
||||||
|
regulatory_domains_unblocked=["machine_safety"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P0,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="is_component",
|
||||||
|
target_field="is_component",
|
||||||
|
label="Ist das Produkt ein Bauteil / eine unvollständige Maschine?",
|
||||||
|
why_needed="Sicherheitsbauteil vs. vollständige Maschine ändert die Pflichten.",
|
||||||
|
regulatory_domains_unblocked=["machine_safety"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P0,
|
||||||
|
),
|
||||||
|
# ── P1: unblock one specific domain ───────────────────────────────
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="has_radio_module",
|
||||||
|
target_field="has_radio_module",
|
||||||
|
label="Enthält das Produkt ein Funkmodul (WLAN/Bluetooth/Mobilfunk)?",
|
||||||
|
why_needed="Ein Funkmodul löst die Funkanlagen-Richtlinie (RED) aus.",
|
||||||
|
regulatory_domains_unblocked=["radio"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="generates_usage_data",
|
||||||
|
target_field="generates_usage_data",
|
||||||
|
label="Erzeugt das vernetzte Produkt nutzbare Produkt-/Nutzungsdaten?",
|
||||||
|
why_needed="Erzeugte Nutzungsdaten entscheiden über Data-Act-Pflichten.",
|
||||||
|
regulatory_domains_unblocked=["data"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="has_security_function",
|
||||||
|
target_field="has_security_function",
|
||||||
|
label="Hat das Produkt eine dedizierte Security-Funktion (gegen böswillige Akteure)?",
|
||||||
|
why_needed="Trennt Security- von Safety-Funktion (CRA vs. MaschinenVO).",
|
||||||
|
regulatory_domains_unblocked=["cyber", "machine_safety"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="env_wastewater",
|
||||||
|
target_field="environmental.discharges_to_wastewater",
|
||||||
|
label="Gibt das Produkt Stoffe an Wasser / Abwasser ab?",
|
||||||
|
why_needed="Abwassereinleitung löst Abwasser-/Gewässerrecht aus.",
|
||||||
|
regulatory_domains_unblocked=["environment_water"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="env_air",
|
||||||
|
target_field="environmental.emits_to_air",
|
||||||
|
label="Entstehen Luftemissionen (VOC, Staub, Verbrennung, Aerosole)?",
|
||||||
|
why_needed="Luftemissionen lösen Immissionsschutzrecht aus.",
|
||||||
|
regulatory_domains_unblocked=["environment_air"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="env_chemicals",
|
||||||
|
target_field="environmental.uses_cleaning_chemicals",
|
||||||
|
label="Werden Reinigungs-, Desinfektions- oder Biozidmittel verwendet/mitgeliefert?",
|
||||||
|
why_needed="Chemikalien lösen REACH/CLP/Detergenzien-/Biozidrecht aus.",
|
||||||
|
regulatory_domains_unblocked=["chemicals"],
|
||||||
|
answer_type=AnswerType.BOOL,
|
||||||
|
priority=QuestionPriority.P1,
|
||||||
|
),
|
||||||
|
# ── P2: refinement ────────────────────────────────────────────────
|
||||||
|
NavigatorQuestion(
|
||||||
|
question_id="components",
|
||||||
|
target_field="components",
|
||||||
|
label="Aus welchen wesentlichen Komponenten besteht das Produkt?",
|
||||||
|
why_needed="Eine strukturierte Stückliste verfeinert komponenten-abgeleitete Pflichten.",
|
||||||
|
regulatory_domains_unblocked=["radio", "emv", "environment_water", "chemicals"],
|
||||||
|
answer_type=AnswerType.COMPONENT_LIST,
|
||||||
|
priority=QuestionPriority.P2,
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,72 @@
|
|||||||
|
"""Smart Onboarding Advisor — the onboarding runtime step (orchestration over existing engines).
|
||||||
|
|
||||||
|
Turns (company + products + certifications + target) into inferred assumptions, the next best questions
|
||||||
|
(<=5, each self-explaining), the capability delta, top measures, evidence requests and completeness —
|
||||||
|
with NO sales interpretation and NO regulation picking. Orchestrator only: no new engine/registry/
|
||||||
|
meta-model; certificate->capability hypotheses and target requirements are INJECTED.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import advisor_start, apply_answer
|
||||||
|
from .hypotheses import (
|
||||||
|
CapabilityHypothesis,
|
||||||
|
inferred_hypotheses,
|
||||||
|
resolve_for_certifications,
|
||||||
|
)
|
||||||
|
from .observations import (
|
||||||
|
Observation,
|
||||||
|
ObservationType,
|
||||||
|
empirical_confidence,
|
||||||
|
empirical_distribution,
|
||||||
|
reviewed,
|
||||||
|
)
|
||||||
|
from .signals import (
|
||||||
|
ProducedSignal,
|
||||||
|
SignalVocabularyEntry,
|
||||||
|
normalize_signals,
|
||||||
|
)
|
||||||
|
from .silent_intake import (
|
||||||
|
DetectedCapability,
|
||||||
|
IntakeSignal,
|
||||||
|
ProductFact,
|
||||||
|
SignalMapping,
|
||||||
|
SilentIntakeResult,
|
||||||
|
silent_intake,
|
||||||
|
)
|
||||||
|
from .schemas import (
|
||||||
|
AdvisorMeasure,
|
||||||
|
AdvisorQuestion,
|
||||||
|
AdvisorResult,
|
||||||
|
InferredAssumption,
|
||||||
|
OnboardingInput,
|
||||||
|
RejectedAssumption,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"advisor_start",
|
||||||
|
"apply_answer",
|
||||||
|
"OnboardingInput",
|
||||||
|
"AdvisorResult",
|
||||||
|
"AdvisorQuestion",
|
||||||
|
"AdvisorMeasure",
|
||||||
|
"InferredAssumption",
|
||||||
|
"RejectedAssumption",
|
||||||
|
"CapabilityHypothesis",
|
||||||
|
"inferred_hypotheses",
|
||||||
|
"resolve_for_certifications",
|
||||||
|
"Observation",
|
||||||
|
"ObservationType",
|
||||||
|
"empirical_distribution",
|
||||||
|
"empirical_confidence",
|
||||||
|
"reviewed",
|
||||||
|
"silent_intake",
|
||||||
|
"IntakeSignal",
|
||||||
|
"SignalMapping",
|
||||||
|
"DetectedCapability",
|
||||||
|
"ProductFact",
|
||||||
|
"SilentIntakeResult",
|
||||||
|
"ProducedSignal",
|
||||||
|
"SignalVocabularyEntry",
|
||||||
|
"normalize_signals",
|
||||||
|
]
|
||||||
@@ -0,0 +1,159 @@
|
|||||||
|
"""Smart Onboarding Advisor — orchestration over the existing engines (the onboarding runtime step).
|
||||||
|
|
||||||
|
The point of the whole platform, made usable: the user types company + products + certifications +
|
||||||
|
target, and the system does the rest — no sales interpretation, no regulation picking. This is an
|
||||||
|
ORCHESTRATOR, not a new engine: it wires Company 2A (Evidence -> Capability), RS-005 (Capability ->
|
||||||
|
Delta), optimization (Delta -> Roadmap) and completeness into one onboarding flow.
|
||||||
|
|
||||||
|
Three principles it must honour (acceptance criteria):
|
||||||
|
- Multi-cert works; a profile is built from ALL certificates.
|
||||||
|
- relevance(evidence, target): ISO 14001 is NOT falsely relevant to the CRA; ISO 27001/TISAX REDUCE
|
||||||
|
questions but satisfy NOTHING automatically (Welt-1 -> verification_required).
|
||||||
|
- Only the NEXT BEST questions (<= 5), each explaining WHY; every answer updates the profile.
|
||||||
|
|
||||||
|
Certificate -> probable-capability hypotheses and the target's required capabilities are INJECTED (the
|
||||||
|
hypotheses are curated knowledge, not in this code). No corpus loaded here. Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Sequence
|
||||||
|
|
||||||
|
from ..company import (
|
||||||
|
CapabilityMappingEntry,
|
||||||
|
Certification,
|
||||||
|
CompanyCapabilityProfile,
|
||||||
|
CompanyContext,
|
||||||
|
build_company_profile,
|
||||||
|
)
|
||||||
|
from ..completeness import assess_completeness
|
||||||
|
from ..optimization import roadmap_from_delta
|
||||||
|
from ..reasoning.enums import Confidence
|
||||||
|
from ..transition_reasoning import (
|
||||||
|
CoverageStatus,
|
||||||
|
TargetRequirement,
|
||||||
|
TransitionContext,
|
||||||
|
TransitionGoal,
|
||||||
|
assess_transition,
|
||||||
|
)
|
||||||
|
from .schemas import (
|
||||||
|
AdvisorMeasure,
|
||||||
|
AdvisorQuestion,
|
||||||
|
AdvisorResult,
|
||||||
|
InferredAssumption,
|
||||||
|
OnboardingInput,
|
||||||
|
RejectedAssumption,
|
||||||
|
)
|
||||||
|
|
||||||
|
_GAIN = {"high": 3, "medium": 2, "low": 1}
|
||||||
|
_RISK = {"high": 2, "medium": 1, "low": 0}
|
||||||
|
|
||||||
|
|
||||||
|
def _profile(
|
||||||
|
inp: OnboardingInput, cert_hypotheses: Dict[str, List[str]],
|
||||||
|
detected: Optional[Sequence[str]] = None,
|
||||||
|
) -> CompanyCapabilityProfile:
|
||||||
|
cmap = {
|
||||||
|
cert: CapabilityMappingEntry(capability_ids=list(caps), confidence=Confidence.MEDIUM)
|
||||||
|
for cert, caps in cert_hypotheses.items()
|
||||||
|
if cert in inp.certifications and caps
|
||||||
|
}
|
||||||
|
certs = [Certification(certification_id=c) for c in cmap]
|
||||||
|
if detected: # Silent Pass: concrete findings -> HIGH confidence
|
||||||
|
cmap["__detected__"] = CapabilityMappingEntry(
|
||||||
|
capability_ids=list(dict.fromkeys(detected)), confidence=Confidence.HIGH)
|
||||||
|
certs.append(Certification(certification_id="__detected__"))
|
||||||
|
return build_company_profile(CompanyContext(company_id=inp.company or "company", certifications=certs), cmap)
|
||||||
|
|
||||||
|
|
||||||
|
def advisor_start(
|
||||||
|
inp: OnboardingInput,
|
||||||
|
cert_hypotheses: Dict[str, List[str]],
|
||||||
|
target_requirements: Sequence[TargetRequirement],
|
||||||
|
target_id: str = "target",
|
||||||
|
covers_targets: Optional[Dict[str, List[str]]] = None,
|
||||||
|
corpus_status: Optional[Dict[str, str]] = None,
|
||||||
|
uncertain: Optional[List[Dict[str, str]]] = None,
|
||||||
|
detected_capabilities: Optional[Sequence[str]] = None,
|
||||||
|
indicative_capabilities: Optional[Sequence[str]] = None,
|
||||||
|
) -> AdvisorResult:
|
||||||
|
"""Run the onboarding flow: (silent intake +) certs -> profile -> delta -> ranked questions + measures.
|
||||||
|
|
||||||
|
Pure orchestration; deterministic. `cert_hypotheses` (cert -> probable cap ids), `target_requirements`
|
||||||
|
and `detected_capabilities` (from the Silent Knowledge Pass) are INJECTED. Detected capabilities are
|
||||||
|
recognised WITHOUT asking -> they shrink the delta and remove questions.
|
||||||
|
"""
|
||||||
|
covers_targets = covers_targets or {}
|
||||||
|
required = {r.capability_id for r in target_requirements}
|
||||||
|
profile = _profile(inp, cert_hypotheses, detected_capabilities)
|
||||||
|
auto_detected = sorted(set(detected_capabilities or []) & required)
|
||||||
|
# partial/indicative signals raise assumption strength but are NOT fed into the profile -> the gap
|
||||||
|
# stays open and is still asked. Surface only those still relevant and NOT already auto-detected.
|
||||||
|
indications = sorted((set(indicative_capabilities or []) & required) - set(auto_detected))
|
||||||
|
assess = assess_transition(
|
||||||
|
TransitionContext(company_id=inp.company or "company", target=TransitionGoal(target_id=target_id)),
|
||||||
|
list(target_requirements), profile)
|
||||||
|
|
||||||
|
# inferred (Welt-1): per cert, the caps it probably provides that are RELEVANT to this target
|
||||||
|
inferred: List[InferredAssumption] = []
|
||||||
|
rejected: List[RejectedAssumption] = []
|
||||||
|
for cert in inp.certifications:
|
||||||
|
caps = set(cert_hypotheses.get(cert, []))
|
||||||
|
relevant = sorted(caps & required)
|
||||||
|
if relevant:
|
||||||
|
inferred.append(InferredAssumption(
|
||||||
|
certification=cert, capabilities=relevant,
|
||||||
|
statement="%s legt %d relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt"
|
||||||
|
% (cert, len(relevant))))
|
||||||
|
elif caps:
|
||||||
|
rejected.append(RejectedAssumption(
|
||||||
|
certification=cert,
|
||||||
|
statement="%s ist für dieses Ziel nicht relevant" % cert,
|
||||||
|
reason="relevance(evidence, target) = 0 — keine geforderte Fähigkeit abgedeckt"))
|
||||||
|
|
||||||
|
# next best questions (<=5): re-rank the RS-005 requests by info gain + leverage + risk + evidence-gap
|
||||||
|
known_ev = set(inp.known_evidence)
|
||||||
|
scored = []
|
||||||
|
for q in assess.question_requests:
|
||||||
|
lev = len(covers_targets.get(q.capability_id, []))
|
||||||
|
ev_missing = 1 if (q.expected_evidence and not (set(q.expected_evidence) & known_ev)) else 0
|
||||||
|
score = _GAIN.get(q.information_gain.value, 1) + lev + _RISK.get(q.priority.value, 0) + ev_missing
|
||||||
|
scored.append((score, q))
|
||||||
|
scored.sort(key=lambda x: (-x[0], x[1].capability_id))
|
||||||
|
next_q = [
|
||||||
|
AdvisorQuestion(capability_id=q.capability_id, question_intent=q.question_intent, why=q.reason,
|
||||||
|
information_value=float(s), priority=q.priority.value)
|
||||||
|
for s, q in scored[:5]
|
||||||
|
]
|
||||||
|
|
||||||
|
delta = sorted({c.capability_id for c in assess.coverage if c.status == CoverageStatus.MISSING})
|
||||||
|
plan = roadmap_from_delta(assess, {c: covers_targets.get(c, []) for c in delta})
|
||||||
|
measures = [AdvisorMeasure(capability_id=m.capability_id, leverage=m.leverage, closes=m.covers)
|
||||||
|
for m in plan.ranked_measures[:5]]
|
||||||
|
evidence = sorted({e for q in assess.question_requests for e in q.expected_evidence})
|
||||||
|
|
||||||
|
applicable = list(inp.target) or [target_id]
|
||||||
|
rep = assess_completeness(applicable, corpus_status or {}, uncertain=uncertain or [])
|
||||||
|
unsupported = [e.subject for e in rep.exclusions]
|
||||||
|
|
||||||
|
probably = [c for c in assess.summary.probably_covered if c not in set(auto_detected)]
|
||||||
|
return AdvisorResult(
|
||||||
|
inferred_assumptions=inferred, rejected_assumptions=rejected, auto_detected=auto_detected,
|
||||||
|
indications=indications,
|
||||||
|
next_best_questions=next_q, capability_delta=delta, top_measures=measures,
|
||||||
|
evidence_requests=evidence, unsupported_domains=unsupported,
|
||||||
|
completeness_summary=rep.completeness_summary,
|
||||||
|
headline="%d Anforderungen erkannt · %d automatisch erkannt (Intake) · %d wahrscheinlich (Zertifikate) · %d zu klären"
|
||||||
|
% (len(assess.coverage), len(auto_detected), len(probably), len(next_q)))
|
||||||
|
|
||||||
|
|
||||||
|
def apply_answer(known_capabilities: Sequence[str], capability_id: str, answer: str) -> List[str]:
|
||||||
|
"""Update the known-capability set from one answer. `answer` in {confirmed, rejected, unknown}.
|
||||||
|
|
||||||
|
A confirmed answer adds the capability to the known set (shrinking the delta on the next run);
|
||||||
|
rejected/unknown leave it open. This is how every answer updates the profile (criterion 6).
|
||||||
|
"""
|
||||||
|
known = list(dict.fromkeys(known_capabilities))
|
||||||
|
if answer == "confirmed" and capability_id not in known:
|
||||||
|
known.append(capability_id)
|
||||||
|
return known
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
"""Certification Capability Hypotheses — capability-centric, with EMPIRICAL (computed) confidence.
|
||||||
|
|
||||||
|
Each hypothesis is its own knowledge object: "IF a company holds one of `supported_by` certs, we EXPECT
|
||||||
|
`capability` (verification required)" — Welt-1, never "erfüllt". Written ONCE per capability with a list
|
||||||
|
of supporting certs (reuse, not redundancy), so multi-certification merges AUTOMATICALLY.
|
||||||
|
|
||||||
|
`confidence` is NOT an expert/LLM score: it is COMPUTED from real-onboarding observations
|
||||||
|
(confirmed / (confirmed+refuted)), `None` until any are seen. This is the empirical learning loop — the
|
||||||
|
long-term moat. The library is DATA, loaded outside this module and injected. Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Sequence
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class CapabilityHypothesis(BaseModel):
|
||||||
|
"""Curated knowledge only. Confidence is NOT stored here — it is computed from the reviewed
|
||||||
|
observation stream (see observations.py); a raw answer never changes a hypothesis (review gate)."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
capability: str
|
||||||
|
supported_by: List[str] = Field(default_factory=list) # certifications that suggest this capability
|
||||||
|
relationship: str = "supports" # supports / partially_supports
|
||||||
|
verification_required: bool = True # Welt-1: never auto-satisfied
|
||||||
|
question_intent: str = "verify_existence"
|
||||||
|
expected_evidence: List[str] = Field(default_factory=list)
|
||||||
|
kind: str = "shared" # shared / specific
|
||||||
|
|
||||||
|
|
||||||
|
def inferred_hypotheses(
|
||||||
|
certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
|
||||||
|
) -> List[CapabilityHypothesis]:
|
||||||
|
"""Every hypothesis whose `supported_by` intersects the company's certs — the auto multi-cert merge."""
|
||||||
|
certs = set(certifications)
|
||||||
|
return [h for h in library if certs & set(h.supported_by)]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_for_certifications(
|
||||||
|
certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
|
||||||
|
) -> Dict[str, List[str]]:
|
||||||
|
"""Adapt the capability-centric library to the Advisor's `cert -> [capability]` input.
|
||||||
|
|
||||||
|
For each held certification, the capabilities its hypotheses suggest (deduped, deterministic order).
|
||||||
|
"""
|
||||||
|
certs = set(certifications)
|
||||||
|
out: Dict[str, List[str]] = {}
|
||||||
|
for h in library:
|
||||||
|
for cert in h.supported_by:
|
||||||
|
if cert in certs and h.capability not in out.setdefault(cert, []):
|
||||||
|
out[cert].append(h.capability)
|
||||||
|
return {c: out[c] for c in sorted(out)}
|
||||||
@@ -0,0 +1,85 @@
|
|||||||
|
"""Observation Model — the empirical learning unit (Task 59a: model BEFORE persistence/API).
|
||||||
|
|
||||||
|
The learning point is NOT the hypothesis, it is the QUESTION. A hypothesis ("ISO 27001 suggests supplier
|
||||||
|
management") produces a question ("Is there a documented supplier-security process?"), and the answer is
|
||||||
|
rarely binary — "yes" / "no" / "partial, only critical suppliers" / "certified but not lived" are very
|
||||||
|
different observations. So the chain is:
|
||||||
|
|
||||||
|
Hypothesis -> Question -> Observation -> (Review) -> Hypothesis
|
||||||
|
|
||||||
|
Two principles (durable):
|
||||||
|
- Richer than confirmed/refuted: an Observation carries an `observation_type` (confirmed / partial /
|
||||||
|
refuted / not_applicable / unknown), a free-text answer, a scope_note ("only critical suppliers"),
|
||||||
|
and whether evidence was uploaded.
|
||||||
|
- REVIEW GATE: a raw answer NEVER changes a hypothesis directly. Only REVIEWED observations calibrate;
|
||||||
|
otherwise the system learns from outliers. Hypotheses stay curated knowledge; confidence is COMPUTED
|
||||||
|
from the reviewed observation stream (keyed by hypothesis id), not stored on the hypothesis.
|
||||||
|
|
||||||
|
This module defines the model + the deterministic statistics it enables (a DISTRIBUTION, not a single
|
||||||
|
%). Persistence (store), aggregation across customers and hypothesis calibration are later tasks
|
||||||
|
(59b/c/d). Pure, no I/O. Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Dict, List, Optional, Sequence
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class ObservationType(str, Enum):
|
||||||
|
CONFIRMED = "confirmed"
|
||||||
|
PARTIAL = "partial"
|
||||||
|
REFUTED = "refuted"
|
||||||
|
NOT_APPLICABLE = "not_applicable"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
class Observation(BaseModel):
|
||||||
|
"""One real-onboarding answer to one hypothesis-driven question. The raw empirical unit."""
|
||||||
|
|
||||||
|
hypothesis_id: str
|
||||||
|
capability: str = "" # denormalised for convenient aggregation
|
||||||
|
question: str = "" # the question that was actually asked
|
||||||
|
answer: str = "" # the customer's raw answer (free text)
|
||||||
|
observation_type: ObservationType = ObservationType.UNKNOWN
|
||||||
|
scope_note: Optional[str] = None # "only critical suppliers" / "only DE" / "not lived"
|
||||||
|
evidence_uploaded: bool = False
|
||||||
|
reviewed: bool = False # the review gate: only reviewed obs calibrate
|
||||||
|
reviewed_by: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# observation types that count as evidence for/against the capability (n/a + unknown do not)
|
||||||
|
_FOR_AGAINST = (ObservationType.CONFIRMED, ObservationType.PARTIAL, ObservationType.REFUTED)
|
||||||
|
|
||||||
|
|
||||||
|
def empirical_distribution(
|
||||||
|
observations: Sequence[Observation], reviewed_only: bool = True
|
||||||
|
) -> Dict[str, int]:
|
||||||
|
"""Count observations per type — the DISTRIBUTION (e.g. confirmed 61 / partial 31 / refuted 8),
|
||||||
|
far richer than a single percentage. By default only REVIEWED observations count (the review gate)."""
|
||||||
|
dist = {t.value: 0 for t in ObservationType}
|
||||||
|
for o in observations:
|
||||||
|
if o.reviewed or not reviewed_only:
|
||||||
|
dist[o.observation_type.value] += 1
|
||||||
|
return dist
|
||||||
|
|
||||||
|
|
||||||
|
def empirical_confidence(
|
||||||
|
observations: Sequence[Observation], reviewed_only: bool = True
|
||||||
|
) -> Optional[float]:
|
||||||
|
"""Confidence from the reviewed stream: (confirmed + 0.5*partial) / (confirmed+partial+refuted).
|
||||||
|
|
||||||
|
`not_applicable` and `unknown` are excluded from the denominator (they are not evidence either way).
|
||||||
|
`None` until any for/against observation is reviewed — never an expert/LLM score."""
|
||||||
|
dist = empirical_distribution(observations, reviewed_only)
|
||||||
|
base = dist[ObservationType.CONFIRMED.value] + dist[ObservationType.PARTIAL.value] + dist[ObservationType.REFUTED.value]
|
||||||
|
if base == 0:
|
||||||
|
return None
|
||||||
|
return round((dist[ObservationType.CONFIRMED.value] + 0.5 * dist[ObservationType.PARTIAL.value]) / base, 2)
|
||||||
|
|
||||||
|
|
||||||
|
def reviewed(observations: Sequence[Observation]) -> List[Observation]:
|
||||||
|
"""The calibration set: only reviewed observations (a raw answer never updates a hypothesis)."""
|
||||||
|
return [o for o in observations if o.reviewed]
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
"""Schemas for the Smart Onboarding Advisor — the onboarding RUNTIME step.
|
||||||
|
|
||||||
|
DTOs only. The Advisor ORCHESTRATES the existing engines (Company 2A, RS-005, optimization,
|
||||||
|
completeness) — no new reasoning engine, no new capability registry, no new meta-model. Welt-1
|
||||||
|
discipline: a certificate yields PROBABLE capabilities (verification required), never "erfüllt".
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class OnboardingInput(BaseModel):
|
||||||
|
company: str = ""
|
||||||
|
industry: Optional[str] = None
|
||||||
|
products: List[str] = Field(default_factory=list)
|
||||||
|
markets: List[str] = Field(default_factory=list)
|
||||||
|
certifications: List[str] = Field(default_factory=list)
|
||||||
|
known_evidence: List[str] = Field(default_factory=list)
|
||||||
|
target: List[str] = Field(default_factory=list) # informational; the delta uses injected requirements
|
||||||
|
|
||||||
|
|
||||||
|
class InferredAssumption(BaseModel):
|
||||||
|
certification: str
|
||||||
|
capabilities: List[str] = Field(default_factory=list) # RELEVANT-to-target caps the cert probably provides
|
||||||
|
verification_required: bool = True # Welt-1: never auto-satisfied
|
||||||
|
statement: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class RejectedAssumption(BaseModel):
|
||||||
|
certification: Optional[str] = None
|
||||||
|
statement: str = ""
|
||||||
|
reason: str = "" # e.g. "relevance(evidence, target) = 0"
|
||||||
|
|
||||||
|
|
||||||
|
class AdvisorQuestion(BaseModel):
|
||||||
|
capability_id: str
|
||||||
|
question_intent: str
|
||||||
|
why: str # every question explains itself
|
||||||
|
information_value: float = 0.0 # deterministic rank score
|
||||||
|
priority: str = "medium"
|
||||||
|
|
||||||
|
|
||||||
|
class AdvisorMeasure(BaseModel):
|
||||||
|
capability_id: str
|
||||||
|
leverage: int = 0
|
||||||
|
closes: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class AdvisorResult(BaseModel):
|
||||||
|
inferred_assumptions: List[InferredAssumption] = Field(default_factory=list)
|
||||||
|
rejected_assumptions: List[RejectedAssumption] = Field(default_factory=list)
|
||||||
|
auto_detected: List[str] = Field(default_factory=list) # detected (concrete artifact): recognised w/o asking
|
||||||
|
indications: List[str] = Field(default_factory=list) # partial signal: raises assumption strength, STILL asked
|
||||||
|
next_best_questions: List[AdvisorQuestion] = Field(default_factory=list) # max 5
|
||||||
|
capability_delta: List[str] = Field(default_factory=list)
|
||||||
|
top_measures: List[AdvisorMeasure] = Field(default_factory=list)
|
||||||
|
evidence_requests: List[str] = Field(default_factory=list)
|
||||||
|
unsupported_domains: List[str] = Field(default_factory=list)
|
||||||
|
completeness_summary: str = ""
|
||||||
|
headline: str = "" # "N erkannt, M wahrscheinlich abgedeckt, K zu klären"
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
"""Signal Producer interface + Normalizer — one signal language, but TWO signal KINDS.
|
||||||
|
|
||||||
|
The platform already HAS scanners (website, repo/code, SBOM, security headers, TLS, SPF/DKIM/DMARC,
|
||||||
|
document analysis, RAG over uploads, product classification). The Silent Pass does not want a
|
||||||
|
WebsiteScanner or a RepoScanner — it wants their UNIFIED output. So every source (a scanner, a PDF
|
||||||
|
parser, a tender parser, an OEM spec, an API, or the user) emits the SAME `ProducedSignal`
|
||||||
|
{signal_id, source_type, kind, confidence, evidence, provenance}, and `normalize_signals` reduces
|
||||||
|
producer-specific ids to ONE canonical signal via a vocabulary (id + aliases + kind) — exactly the
|
||||||
|
Requirement-Source / MCAP / regulation-alias pattern. The Silent Pass then never gets per-scanner logic.
|
||||||
|
|
||||||
|
CRITICAL — a signal is one of two KINDS, and they NEVER substitute for each other:
|
||||||
|
observation = "I SAW X" — a repo with an SBOM, a published security.txt, a risk-assessment PDF.
|
||||||
|
requirement = "someone DEMANDS X" — a tender clause `requires_sbom`, an OEM spec `supplier_requires_psirt`.
|
||||||
|
A demanded SBOM is NOT a present SBOM. `kind` is carried on the canonical VOCABULARY entry (authoritative),
|
||||||
|
so even a mislabelled producer signal cannot collapse the two. The Silent Pass consumes ONLY observations;
|
||||||
|
requirement signals are preserved and feed the required-set / prioritisation later. This Observation-vs-
|
||||||
|
Requirement split is the very one the Requirements Verification Platform rests on: Observations (reality)
|
||||||
|
vs Requirements (targets); their comparison IS the delta. Pure, deterministic, no I/O. Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Sequence
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from .silent_intake import IntakeSignal
|
||||||
|
|
||||||
|
|
||||||
|
class ProducedSignal(BaseModel):
|
||||||
|
"""What ANY signal producer emits — the common interface every source agrees on."""
|
||||||
|
|
||||||
|
signal_id: str # raw or canonical id the producer used
|
||||||
|
source_type: str = "" # website / repository / document / product / tender / oem / user / api
|
||||||
|
kind: str = "" # "observation" | "requirement"; empty -> resolved from the vocabulary
|
||||||
|
confidence: float = 1.0
|
||||||
|
evidence: Optional[str] = None # the artifact found (already in hand)
|
||||||
|
provenance: str = "" # url / filename / tender clause / "customer statement"
|
||||||
|
|
||||||
|
|
||||||
|
class SignalVocabularyEntry(BaseModel):
|
||||||
|
"""One canonical signal + its aliases + its KIND (the authoritative observation/requirement label)."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
kind: str = "observation" # "observation" (I saw X) | "requirement" (someone DEMANDS X)
|
||||||
|
aliases: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_signals(
|
||||||
|
produced: Sequence[ProducedSignal], vocabulary: Sequence[SignalVocabularyEntry]
|
||||||
|
) -> List[IntakeSignal]:
|
||||||
|
"""Reduce heterogeneous producer signals to the canonical IntakeSignal stream (alias resolution).
|
||||||
|
|
||||||
|
The canonical vocabulary entry's `kind` is AUTHORITATIVE — a producer cannot relabel a requirement as
|
||||||
|
an observation (that is what stops a demanded SBOM from masquerading as a present one). Unknown signal
|
||||||
|
ids pass through unchanged (a new producer's signal stays visible, not silently dropped) and keep the
|
||||||
|
producer-declared kind (default observation). Deterministic; carries confidence/evidence/provenance.
|
||||||
|
"""
|
||||||
|
alias: Dict[str, str] = {}
|
||||||
|
kind_of: Dict[str, str] = {}
|
||||||
|
for v in vocabulary:
|
||||||
|
alias[v.id] = v.id
|
||||||
|
kind_of[v.id] = v.kind
|
||||||
|
for a in v.aliases:
|
||||||
|
alias[a] = v.id
|
||||||
|
out: List[IntakeSignal] = []
|
||||||
|
for p in produced:
|
||||||
|
canonical = alias.get(p.signal_id, p.signal_id)
|
||||||
|
kind = kind_of.get(canonical) or p.kind or "observation"
|
||||||
|
out.append(IntakeSignal(
|
||||||
|
source=p.source_type, signal=canonical, kind=kind, confidence=p.confidence,
|
||||||
|
evidence=p.evidence, provenance=p.provenance))
|
||||||
|
return out
|
||||||
@@ -0,0 +1,124 @@
|
|||||||
|
"""Silent Knowledge Pass — recognise everything possible BEFORE asking a single question (Phase 0).
|
||||||
|
|
||||||
|
The Advisor can say "I need 5 answers" but does not yet decide WHAT it can find out by itself. The Silent
|
||||||
|
Pass runs first: from signals that existing scanners/parsers already produce (website, repository,
|
||||||
|
documents, product data) it deterministically derives capabilities the company demonstrably HAS and
|
||||||
|
product facts that drive scope — so every recognised item shrinks the delta and removes a question.
|
||||||
|
|
||||||
|
The customer then experiences "we already recognised 11 of 17 — only these 4 remain" instead of a
|
||||||
|
question wall. This is NOT new architecture: it is one orchestration step in front of the Advisor
|
||||||
|
Company -> Silent Intake -> Company Profile -> Hypotheses -> Delta -> Top Questions
|
||||||
|
All building blocks already exist. SIGNALS are INJECTED (the scanners produce them); the signal->capability
|
||||||
|
map is curated DATA, also injected. Pure, deterministic, no I/O. Python 3.9 compatible.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional, Sequence, Set
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class IntakeSignal(BaseModel):
|
||||||
|
"""A CANONICAL signal the Silent Pass consumes. Producer-agnostic: the same `signal` may have come
|
||||||
|
from a website, a repo, a PDF, a tender or the user — normalize_signals() unified them (see signals.py)."""
|
||||||
|
|
||||||
|
source: str # source_type: website / repository / document / product / tender / user
|
||||||
|
signal: str # CANONICAL signal id, e.g. "sbom_present"
|
||||||
|
kind: str = "observation" # "observation" (I saw X) | "requirement" (someone DEMANDS X)
|
||||||
|
confidence: float = 1.0 # carried from the producer
|
||||||
|
evidence: Optional[str] = None # the artifact already in hand
|
||||||
|
provenance: str = "" # where it came from (url / filename / tender clause) — audit trail
|
||||||
|
detail: str = "" # free-text (kept for back-compat)
|
||||||
|
|
||||||
|
|
||||||
|
class SignalMapping(BaseModel):
|
||||||
|
"""Curated: what a signal lets us conclude. A signal yields a capability OR a product fact."""
|
||||||
|
|
||||||
|
signal: str
|
||||||
|
capability: Optional[str] = None # capability the signal evidences
|
||||||
|
relationship: str = "detected" # detected (concrete artifact) / partial (indicative)
|
||||||
|
evidence: Optional[str] = None # the artifact found (already in hand -> no upload needed)
|
||||||
|
product_fact: Optional[str] = None # e.g. "connected_to_internet"
|
||||||
|
fact_value: str = "true"
|
||||||
|
rationale: str = "" # curated note: WHY only indicative (esp. for partial mappings)
|
||||||
|
|
||||||
|
|
||||||
|
class DetectedCapability(BaseModel):
|
||||||
|
capability: str
|
||||||
|
relationship: str = "detected"
|
||||||
|
source: str = "" # which signal/source detected it (audit trail)
|
||||||
|
evidence: Optional[str] = None
|
||||||
|
confidence: float = 1.0 # carried from the producing signal
|
||||||
|
provenance: str = "" # where the signal came from
|
||||||
|
|
||||||
|
|
||||||
|
class ProductFact(BaseModel):
|
||||||
|
key: str
|
||||||
|
value: str = "true"
|
||||||
|
source: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class SilentIntakeResult(BaseModel):
|
||||||
|
detected_capabilities: List[DetectedCapability] = Field(default_factory=list)
|
||||||
|
product_facts: List[ProductFact] = Field(default_factory=list)
|
||||||
|
evidence_found: List[str] = Field(default_factory=list)
|
||||||
|
requirements_seen: List[str] = Field(default_factory=list) # requirement-kind signals — preserved, NOT present
|
||||||
|
summary: str = ""
|
||||||
|
|
||||||
|
def capability_ids(self) -> List[str]:
|
||||||
|
"""The DETECTED capability ids (relationship == detected) — fed into the Advisor as already-present
|
||||||
|
(delta-reducing, not asked). ONLY observation-kind signals reach here (requirements never become a
|
||||||
|
present capability); a merely PARTIAL/indicative signal does NOT (see indicative_capability_ids)."""
|
||||||
|
return sorted({d.capability for d in self.detected_capabilities if d.relationship == "detected"})
|
||||||
|
|
||||||
|
def indicative_capability_ids(self) -> List[str]:
|
||||||
|
"""Capabilities backed only by a PARTIAL/indicative signal — they raise assumption strength but do
|
||||||
|
NOT replace a question (the gap stays open and is still asked, just with an indication shown)."""
|
||||||
|
return sorted({d.capability for d in self.detected_capabilities if d.relationship != "detected"})
|
||||||
|
|
||||||
|
|
||||||
|
def silent_intake(
|
||||||
|
signals: Sequence[IntakeSignal], signal_map: Sequence[SignalMapping]
|
||||||
|
) -> SilentIntakeResult:
|
||||||
|
"""Derive capabilities + product facts from injected scanner signals (deterministic, no questions).
|
||||||
|
|
||||||
|
Each signal is matched to curated mappings by `signal` id; a mapping contributes either a detected
|
||||||
|
capability (+ optional evidence already in hand) or a product fact. Deduped, deterministic order.
|
||||||
|
"""
|
||||||
|
by_signal: Dict[str, List[SignalMapping]] = {}
|
||||||
|
for m in signal_map:
|
||||||
|
by_signal.setdefault(m.signal, []).append(m)
|
||||||
|
|
||||||
|
caps: Dict[str, DetectedCapability] = {}
|
||||||
|
facts: Dict[str, ProductFact] = {}
|
||||||
|
evidence: Set[str] = set()
|
||||||
|
requirements: Set[str] = set()
|
||||||
|
for s in signals:
|
||||||
|
if s.kind != "observation": # a requirement describes a TARGET, never the present state
|
||||||
|
requirements.add(s.signal) # preserved + visible, but NEVER turned into a capability
|
||||||
|
continue
|
||||||
|
for m in by_signal.get(s.signal, []):
|
||||||
|
if m.capability and m.capability not in caps:
|
||||||
|
caps[m.capability] = DetectedCapability(
|
||||||
|
capability=m.capability, relationship=m.relationship,
|
||||||
|
source="%s:%s" % (s.source, s.signal), evidence=m.evidence,
|
||||||
|
confidence=s.confidence, provenance=s.provenance)
|
||||||
|
if m.evidence:
|
||||||
|
evidence.add(m.evidence)
|
||||||
|
if m.product_fact:
|
||||||
|
facts[m.product_fact] = ProductFact(key=m.product_fact, value=m.fact_value, source=s.source)
|
||||||
|
|
||||||
|
detected = [caps[k] for k in sorted(caps)]
|
||||||
|
product_facts = [facts[k] for k in sorted(facts)]
|
||||||
|
requirements_seen = sorted(requirements)
|
||||||
|
n_detected = sum(1 for d in detected if d.relationship == "detected") # concrete artifacts -> auto-detected
|
||||||
|
n_indication = len(detected) - n_detected # partial -> indication, still asked
|
||||||
|
summary = (
|
||||||
|
"Stille Vorbefüllung: %d Fähigkeit(en) automatisch erkannt, %d Indikation(en), %d Produktfakt(en), "
|
||||||
|
"%d Nachweis(e) bereits vorhanden, %d Anforderung(en) erkannt (nicht als vorhanden gewertet)."
|
||||||
|
% (n_detected, n_indication, len(product_facts), len(evidence), len(requirements_seen))
|
||||||
|
)
|
||||||
|
return SilentIntakeResult(
|
||||||
|
detected_capabilities=detected, product_facts=product_facts,
|
||||||
|
evidence_found=sorted(evidence), requirements_seen=requirements_seen, summary=summary)
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
"""Regulatory Optimization — the Roadmap / Management renderer of the Capability Delta Engine.
|
||||||
|
|
||||||
|
Ranks the OPEN Capability Delta (from RS-005) by regulatory leverage: which measure closes the
|
||||||
|
most regulatory requirements at once. Answers the Geschäftsführer question "Womit anfangen?".
|
||||||
|
Pure, deterministic, computed-not-stored. Consumes the RS-005 delta (acyclic dependency); the
|
||||||
|
delta engine stays hermetic. No new corpus, no new meta-model class (freeze v1.0).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import regulatory_leverage, roadmap_from_delta, select_within_budget
|
||||||
|
from .schemas import BudgetPlan, OptimizationPlan, RankedMeasure
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"regulatory_leverage",
|
||||||
|
"select_within_budget",
|
||||||
|
"roadmap_from_delta",
|
||||||
|
"OptimizationPlan",
|
||||||
|
"RankedMeasure",
|
||||||
|
"BudgetPlan",
|
||||||
|
]
|
||||||
@@ -0,0 +1,134 @@
|
|||||||
|
"""Regulatory Optimization — the Roadmap / Management RENDERER of the Capability Delta Engine.
|
||||||
|
|
||||||
|
GAP analysis and measure-prioritisation are TWO VIEWS OF THE SAME COMPUTATION. The Capability
|
||||||
|
Delta Engine (`compliance/transition_reasoning`, RS-005) computes Required - Known = the
|
||||||
|
Capability Delta once. Renderers read that ONE delta:
|
||||||
|
- Interview Renderer (missing INFORMATION -> questions) = `TransitionQuestionRequest` (built)
|
||||||
|
- Roadmap / Management Renderer (missing CAPABILITIES -> measures by leverage) = THIS module
|
||||||
|
- Evidence Renderer (missing EVIDENCE -> upload requests) = later
|
||||||
|
There is one truth, not a Gap engine and a separate Roadmap engine.
|
||||||
|
|
||||||
|
A measure (a capability to implement) has *regulatory leverage* = the number of distinct
|
||||||
|
regulatory requirements it closes AT ONCE (e.g. patch management closes a CRA, a MaschinenVO,
|
||||||
|
an IEC 62443 and an ISO 27001 requirement -> leverage 4). The product turns from "you have N
|
||||||
|
obligations" into "of N identified requirements you only need M measures — and these K first".
|
||||||
|
|
||||||
|
Fully deterministic, computed-not-stored, NO new corpus. `regulatory_leverage`/`select_within_budget`
|
||||||
|
are pure math over `capability -> requirements`; `roadmap_from_delta` binds them to the RS-005
|
||||||
|
delta (dependency optimization -> transition_reasoning, acyclic; the delta engine stays hermetic).
|
||||||
|
No new graph/meta-model class (freeze v1.0). Python 3.9 compatible.
|
||||||
|
|
||||||
|
Honesty (Welt-1): the percentages are exact count ratios over the IDENTIFIED requirements from
|
||||||
|
the known patterns — never "% gesetzeskonform". Label outputs as "der identifizierten Anforderungen".
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from ..transition_reasoning import CoverageStatus, TransitionAssessment
|
||||||
|
from .schemas import BudgetPlan, OptimizationPlan, RankedMeasure
|
||||||
|
|
||||||
|
|
||||||
|
def _ranked(
|
||||||
|
capability_requirements: Dict[str, List[str]], in_scope: Optional[List[str]]
|
||||||
|
) -> List[RankedMeasure]:
|
||||||
|
"""Rank measures: leverage desc, then capability_id asc (deterministic). Empty covers dropped."""
|
||||||
|
scope = (
|
||||||
|
set(in_scope)
|
||||||
|
if in_scope is not None
|
||||||
|
else {r for reqs in capability_requirements.values() for r in reqs}
|
||||||
|
)
|
||||||
|
measures: List[RankedMeasure] = []
|
||||||
|
for cap, reqs in capability_requirements.items():
|
||||||
|
covers = sorted({r for r in reqs if r in scope})
|
||||||
|
if not covers:
|
||||||
|
continue # this capability closes nothing in scope -> not a measure here
|
||||||
|
measures.append(RankedMeasure(capability_id=cap, covers=covers, leverage=len(covers)))
|
||||||
|
measures.sort(key=lambda m: (-m.leverage, m.capability_id))
|
||||||
|
total = sum(m.leverage for m in measures)
|
||||||
|
running = 0
|
||||||
|
for m in measures:
|
||||||
|
running += m.leverage
|
||||||
|
m.cumulative_requirements = running
|
||||||
|
m.cumulative_coverage = (running / total) if total else 0.0
|
||||||
|
return measures
|
||||||
|
|
||||||
|
|
||||||
|
def regulatory_leverage(
|
||||||
|
capability_requirements: Dict[str, List[str]], in_scope: Optional[List[str]] = None
|
||||||
|
) -> OptimizationPlan:
|
||||||
|
"""Rank measures by regulatory leverage; report the compression (requirements -> measures).
|
||||||
|
|
||||||
|
`capability_requirements`: measure (capability_id) -> the requirement keys it satisfies. A
|
||||||
|
requirement key is currently a regulation (via `covers_targets`); finer obligation granularity
|
||||||
|
is a future extension. `in_scope`: restrict the requirement keys counted (default: all seen).
|
||||||
|
"""
|
||||||
|
measures = _ranked(capability_requirements, in_scope)
|
||||||
|
scope = sorted(
|
||||||
|
set(in_scope)
|
||||||
|
if in_scope is not None
|
||||||
|
else {r for reqs in capability_requirements.values() for r in reqs}
|
||||||
|
)
|
||||||
|
total = sum(m.leverage for m in measures)
|
||||||
|
avg = (total / len(measures)) if measures else 0.0
|
||||||
|
headline = (
|
||||||
|
"%d identifizierte Anforderungen aus %d Regelwerken -> %d Massnahmen (Ø Hebel %.1f)."
|
||||||
|
% (total, len(scope), len(measures), avg)
|
||||||
|
)
|
||||||
|
return OptimizationPlan(
|
||||||
|
in_scope_requirements=scope,
|
||||||
|
total_measures=len(measures),
|
||||||
|
total_requirements=total,
|
||||||
|
ranked_measures=measures,
|
||||||
|
headline=headline,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def select_within_budget(
|
||||||
|
capability_requirements: Dict[str, List[str]],
|
||||||
|
budget: int,
|
||||||
|
in_scope: Optional[List[str]] = None,
|
||||||
|
) -> BudgetPlan:
|
||||||
|
"""The budget answer: with K measures, pick the K highest-leverage ones and report coverage.
|
||||||
|
|
||||||
|
Because each requirement key is closed by exactly one measure here, greedy-by-leverage is the
|
||||||
|
optimal cover, so ranking == selection. (When requirements become shared across capabilities,
|
||||||
|
this becomes weighted set-cover; the signature is ready for that.)
|
||||||
|
"""
|
||||||
|
measures = _ranked(capability_requirements, in_scope)
|
||||||
|
total = sum(m.leverage for m in measures)
|
||||||
|
k = max(0, budget)
|
||||||
|
selected = measures[:k]
|
||||||
|
closed = selected[-1].cumulative_requirements if selected else 0
|
||||||
|
ratio = (closed / total) if total else 0.0
|
||||||
|
headline = (
|
||||||
|
"Mit den Top-%d Massnahmen (nach regulatorischem Hebel) schliessen Sie %d von %d "
|
||||||
|
"identifizierten Anforderungen (%.0f%%)." % (len(selected), closed, total, ratio * 100)
|
||||||
|
)
|
||||||
|
return BudgetPlan(
|
||||||
|
budget=budget,
|
||||||
|
selected_capabilities=[m.capability_id for m in selected],
|
||||||
|
requirements_closed=closed,
|
||||||
|
total_requirements=total,
|
||||||
|
coverage_ratio=ratio,
|
||||||
|
headline=headline,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def roadmap_from_delta(
|
||||||
|
assessment: TransitionAssessment,
|
||||||
|
capability_requirements: Dict[str, List[str]],
|
||||||
|
in_scope: Optional[List[str]] = None,
|
||||||
|
open_statuses: Optional[List[CoverageStatus]] = None,
|
||||||
|
) -> OptimizationPlan:
|
||||||
|
"""Render the Roadmap view FROM a Capability Delta (an RS-005 `TransitionAssessment`).
|
||||||
|
|
||||||
|
Takes the OPEN capabilities of the delta — MISSING by default — and ranks them by regulatory
|
||||||
|
leverage. This is the same delta the Interview Renderer turns into questions; here it becomes
|
||||||
|
prioritised measures. The binding that makes "one truth, two renderers" real in code.
|
||||||
|
"""
|
||||||
|
statuses = set(open_statuses) if open_statuses is not None else {CoverageStatus.MISSING}
|
||||||
|
open_caps = [c.capability_id for c in assessment.coverage if c.status in statuses]
|
||||||
|
delta_reqs = {cap: capability_requirements.get(cap, []) for cap in open_caps}
|
||||||
|
return regulatory_leverage(delta_reqs, in_scope)
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
"""Schemas for the Regulatory Optimization Engine.
|
||||||
|
|
||||||
|
These DTOs are *derived views* (computed-not-stored): nothing here is persisted; every value
|
||||||
|
is recomputed from the input each call. No new meta-model class, no graph (freeze v1.0).
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class RankedMeasure(BaseModel):
|
||||||
|
"""One measure (a capability to implement) ranked by its regulatory leverage."""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
covers: List[str] = Field(default_factory=list) # the in-scope requirements it satisfies
|
||||||
|
leverage: int = 0 # = len(covers): how many it closes at once
|
||||||
|
cumulative_requirements: int = 0 # running total of requirements closed (ranked order)
|
||||||
|
cumulative_coverage: float = 0.0 # cumulative_requirements / total_requirements (0..1)
|
||||||
|
|
||||||
|
|
||||||
|
class OptimizationPlan(BaseModel):
|
||||||
|
"""Measures ranked by regulatory leverage — greatest regulatory effect first.
|
||||||
|
|
||||||
|
`total_requirements` counts the IDENTIFIED requirements in scope (the known delta from the
|
||||||
|
patterns), NOT a company's total legal duties. The percentages are exact count ratios over
|
||||||
|
this identified set — never a compliance verdict (Welt-1 discipline).
|
||||||
|
"""
|
||||||
|
|
||||||
|
in_scope_requirements: List[str] = Field(default_factory=list) # the distinct requirement keys counted
|
||||||
|
total_measures: int = 0 # number of distinct measures (delta capabilities)
|
||||||
|
total_requirements: int = 0 # Sum of leverage = identified requirements closable
|
||||||
|
ranked_measures: List[RankedMeasure] = Field(default_factory=list)
|
||||||
|
headline: str = "" # "N identifizierte Anforderungen -> M Massnahmen ..."
|
||||||
|
|
||||||
|
|
||||||
|
class BudgetPlan(BaseModel):
|
||||||
|
"""The budget answer: with a budget of K measures, which K and how much do they close?"""
|
||||||
|
|
||||||
|
budget: int = 0
|
||||||
|
selected_capabilities: List[str] = Field(default_factory=list)
|
||||||
|
requirements_closed: int = 0
|
||||||
|
total_requirements: int = 0
|
||||||
|
coverage_ratio: float = 0.0 # requirements_closed / total_requirements (0..1)
|
||||||
|
headline: str = ""
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
"""Implementation Playbook — the Berater renderer ("wie komme ich dort hin?").
|
||||||
|
|
||||||
|
For one capability it assembles the full implementation journey (why / closes which regulations /
|
||||||
|
tools / process / evidence / controls) from curated playbook knowledge + regulatory leverage +
|
||||||
|
injected Execution links. `playbooks_for_plan` chains the Optimization Roadmap into per-measure
|
||||||
|
playbooks. Pure, deterministic, computed-not-stored. No new corpus, no new meta-model class
|
||||||
|
(freeze v1.0). Curated content = expert draft, never normative.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .engine import build_playbook, playbooks_for_plan
|
||||||
|
from .schemas import Playbook, PlaybookStep
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"build_playbook",
|
||||||
|
"playbooks_for_plan",
|
||||||
|
"Playbook",
|
||||||
|
"PlaybookStep",
|
||||||
|
]
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
"""Implementation Playbook — the Berater renderer ("wie komme ich dort hin?").
|
||||||
|
|
||||||
|
After the Capability Delta Engine says WHAT is missing and the Optimization renderer says WHICH
|
||||||
|
measure first, the Playbook renderer says HOW to implement it. For one capability it assembles the
|
||||||
|
full journey from three sources:
|
||||||
|
- curated playbook KNOWLEDGE (why / tools / process steps / evidence / how others do it) — the
|
||||||
|
Reasoning Knowledge Acquisition layer under `knowledge/implementation_playbooks/`,
|
||||||
|
- the regulatory LEVERAGE (which regulations a delivered capability closes) — reused from the
|
||||||
|
Optimization renderer,
|
||||||
|
- injected Procedure/Control/Evidence links (Execution-owned; empty until linked).
|
||||||
|
|
||||||
|
Pure, deterministic, computed-not-stored. Chains optimization -> playbook (acyclic). No new corpus,
|
||||||
|
no new meta-model class (freeze v1.0). Python 3.9 compatible.
|
||||||
|
|
||||||
|
The curated content is an EXPERT DRAFT, never a normative requirement. When no playbook knowledge
|
||||||
|
exists for a capability yet, the renderer emits a `status: missing` stub — the honest signal that
|
||||||
|
the bottleneck is CONTENT (Knowledge Acquisition), not software.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from ..optimization import OptimizationPlan
|
||||||
|
from .schemas import Playbook, PlaybookStep
|
||||||
|
|
||||||
|
_MISSING_WHY = "(Playbook-Inhalt fehlt — Knowledge Acquisition offen.)"
|
||||||
|
_DRAFT_DISCLAIMER = (
|
||||||
|
"Kuratiertes Experten-Wissen (Erstentwurf), KEINE normative Anforderung. Tools/Schritte sind "
|
||||||
|
"Empfehlungen, kein Pflichtkatalog; Controls werden aus der Execution-Schicht injiziert."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _steps(raw: Any) -> List[PlaybookStep]:
|
||||||
|
steps: List[PlaybookStep] = []
|
||||||
|
for i, s in enumerate(raw or [], 1):
|
||||||
|
steps.append(PlaybookStep(order=i, title=str(s.get("title", "")), detail=str(s.get("detail", ""))))
|
||||||
|
return steps
|
||||||
|
|
||||||
|
|
||||||
|
def build_playbook(
|
||||||
|
capability_id: str,
|
||||||
|
knowledge: Optional[Dict[str, Any]] = None,
|
||||||
|
closes_regulations: Optional[List[str]] = None,
|
||||||
|
control_links: Optional[List[str]] = None,
|
||||||
|
) -> Playbook:
|
||||||
|
"""Assemble the implementation journey for ONE capability.
|
||||||
|
|
||||||
|
`knowledge`: the curated playbook dict (None/empty -> a `missing` stub). `closes_regulations`:
|
||||||
|
the regulations a delivered capability closes (leverage, from `covers_targets`). `control_links`:
|
||||||
|
Execution-owned control refs, injected (default empty — no Execution data in Reasoning code).
|
||||||
|
"""
|
||||||
|
closes = sorted(set(closes_regulations or []))
|
||||||
|
if not knowledge:
|
||||||
|
return Playbook(
|
||||||
|
capability_id=capability_id, title=capability_id, why=_MISSING_WHY,
|
||||||
|
closes_regulations=closes, leverage=len(closes), controls=list(control_links or []),
|
||||||
|
status="missing", disclaimer=_DRAFT_DISCLAIMER,
|
||||||
|
)
|
||||||
|
return Playbook(
|
||||||
|
capability_id=capability_id,
|
||||||
|
title=str(knowledge.get("title", capability_id)),
|
||||||
|
why=str(knowledge.get("why", "")),
|
||||||
|
closes_regulations=closes,
|
||||||
|
leverage=len(closes),
|
||||||
|
tools=list(knowledge.get("tools", [])),
|
||||||
|
process_steps=_steps(knowledge.get("process_steps")),
|
||||||
|
expected_evidence=list(knowledge.get("expected_evidence", [])),
|
||||||
|
controls=list(control_links or []),
|
||||||
|
how_others_do_it=str(knowledge.get("how_others_do_it", "")),
|
||||||
|
status=str(knowledge.get("status", "draft")),
|
||||||
|
disclaimer=str(knowledge.get("disclaimer", _DRAFT_DISCLAIMER)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def playbooks_for_plan(
|
||||||
|
plan: OptimizationPlan,
|
||||||
|
knowledge_by_cap: Dict[str, Dict[str, Any]],
|
||||||
|
top_k: Optional[int] = None,
|
||||||
|
control_links_by_cap: Optional[Dict[str, List[str]]] = None,
|
||||||
|
) -> List[Playbook]:
|
||||||
|
"""Render playbooks for the highest-leverage measures of an OptimizationPlan (Roadmap -> How).
|
||||||
|
|
||||||
|
Walks the ranked measures (top_k, or all) and builds each capability's playbook, using the
|
||||||
|
measure's own `covers` as the regulations it closes. Measures without curated knowledge become
|
||||||
|
`missing` stubs — surfacing exactly where playbook content is still owed.
|
||||||
|
"""
|
||||||
|
links = control_links_by_cap or {}
|
||||||
|
measures = plan.ranked_measures if top_k is None else plan.ranked_measures[: max(0, top_k)]
|
||||||
|
return [
|
||||||
|
build_playbook(
|
||||||
|
m.capability_id, knowledge_by_cap.get(m.capability_id),
|
||||||
|
closes_regulations=m.covers, control_links=links.get(m.capability_id),
|
||||||
|
)
|
||||||
|
for m in measures
|
||||||
|
]
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
"""Schemas for the Implementation Playbook renderer.
|
||||||
|
|
||||||
|
A Playbook is a *derived view* (computed-not-stored): it assembles, for one capability, the full
|
||||||
|
"wie komme ich dort hin?" journey from (a) curated playbook KNOWLEDGE, (b) the regulatory leverage
|
||||||
|
(which regulations a delivered capability closes), and (c) injected Procedure/Control/Evidence links
|
||||||
|
(Execution-owned). Nothing here is persisted. No new meta-model class, no graph (freeze v1.0).
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class PlaybookStep(BaseModel):
|
||||||
|
"""One step in the recommended way to stand up a capability."""
|
||||||
|
|
||||||
|
order: int
|
||||||
|
title: str
|
||||||
|
detail: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class Playbook(BaseModel):
|
||||||
|
"""The complete implementation journey for ONE capability — the Berater view.
|
||||||
|
|
||||||
|
Answers, in order: Warum? -> Welche Regelwerke schliesst das? -> Welche Tools? -> Welche
|
||||||
|
Prozesse? -> Welche Nachweise? -> Welche Controls? The curated parts (why/tools/steps/evidence/
|
||||||
|
how-others) are an EXPERT DRAFT, not a normative requirement; controls are injected from
|
||||||
|
Execution (may be empty until linked).
|
||||||
|
"""
|
||||||
|
|
||||||
|
capability_id: str
|
||||||
|
title: str = ""
|
||||||
|
why: str = "" # why this is required (regulatory rationale)
|
||||||
|
closes_regulations: List[str] = Field(default_factory=list) # leverage: regulations a delivered cap closes
|
||||||
|
leverage: int = 0 # = len(closes_regulations)
|
||||||
|
tools: List[str] = Field(default_factory=list) # typical tooling (curated knowledge)
|
||||||
|
process_steps: List[PlaybookStep] = Field(default_factory=list) # how to stand it up
|
||||||
|
expected_evidence: List[str] = Field(default_factory=list) # artifacts that prove it
|
||||||
|
controls: List[str] = Field(default_factory=list) # control refs (injected from Execution; may be empty)
|
||||||
|
how_others_do_it: str = "" # "wie machen das andere?" (curated)
|
||||||
|
status: str = "draft" # draft -> reviewed -> validated -> proven
|
||||||
|
disclaimer: str = "" # expert draft, not a normative requirement
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
"""Product-scope orchestration (step 3).
|
||||||
|
|
||||||
|
Connects the Navigator's fact-gate to the existing reasoning `discover_scope`:
|
||||||
|
decide regulatory scope only once the minimum (P0) facts are present, otherwise
|
||||||
|
return the missing facts. Reuses discover_scope unchanged — no new scope logic.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .orchestrator import resolve_product_scope
|
||||||
|
from .schemas import (
|
||||||
|
ProductScopeRequest,
|
||||||
|
ProductScopeResponse,
|
||||||
|
RegulatoryScopeResult,
|
||||||
|
ScopeStatus,
|
||||||
|
UnsupportedDomain,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"resolve_product_scope",
|
||||||
|
"ProductScopeRequest",
|
||||||
|
"ProductScopeResponse",
|
||||||
|
"RegulatoryScopeResult",
|
||||||
|
"UnsupportedDomain",
|
||||||
|
"ScopeStatus",
|
||||||
|
]
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
"""Product-scope orchestrator (step 3) — gate, then reuse discover_scope.
|
||||||
|
|
||||||
|
THE rule: the Scope Engine decides only once the Navigator has released the
|
||||||
|
minimum facts. If P0 facts are missing, return the missing facts/questions and do
|
||||||
|
NOT run discover_scope. Otherwise project the canonical into the reasoning profile
|
||||||
|
and run the EXISTING `discover_scope` exactly once.
|
||||||
|
|
||||||
|
No new scope rules, no new regulations, no environmental-law evaluation (those
|
||||||
|
domains are surfaced only as unsupported_domains / future_corpus_needed).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
from compliance.navigator.engine import navigate
|
||||||
|
from compliance.profile.canonical import CanonicalProductRegulatoryProfile
|
||||||
|
from compliance.profile.to_reasoning import to_reasoning_profile
|
||||||
|
from compliance.reasoning.scope_engine import discover_scope
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
ProductScopeResponse,
|
||||||
|
RegulatoryScopeResult,
|
||||||
|
ScopeStatus,
|
||||||
|
UnsupportedDomain,
|
||||||
|
)
|
||||||
|
|
||||||
|
# environmental trigger field -> (domain, note). Transparency only — not a verdict.
|
||||||
|
_ENV_DOMAINS: List[Tuple[str, str, str]] = [
|
||||||
|
("discharges_to_wastewater", "environment_water", "Abwasser-/Gewässerrecht (z. B. AbwV, WRRL) — noch nicht im Korpus."),
|
||||||
|
("has_cooling_or_spraying_water", "environment_water", "Wasserbezogene Anforderungen — noch nicht im Korpus."),
|
||||||
|
("emits_to_air", "environment_air", "Immissionsschutz-/Luftreinhalterecht (z. B. BImSchG, IED) — noch nicht im Korpus."),
|
||||||
|
("uses_solvents", "environment_air", "Lösemittel-/VOC-Recht (z. B. 31. BImSchV) — noch nicht im Korpus."),
|
||||||
|
("uses_cleaning_chemicals", "chemicals", "Chemikalienrecht (REACH/CLP/Detergenzien/Biozide) — noch nicht im Korpus."),
|
||||||
|
("supplies_chemicals", "chemicals", "Chemikalienrecht (REACH/CLP) — noch nicht im Korpus."),
|
||||||
|
("contains_restricted_substances", "chemicals", "Stoffbeschränkungen (REACH/RoHS) — noch nicht im Korpus."),
|
||||||
|
("creates_waste", "waste", "Abfall-/Entsorgungsrecht (u. a. WEEE) — noch nicht im Korpus."),
|
||||||
|
("consumes_energy_or_water", "energy_resources", "Energie-/Ökodesign-Recht — noch nicht im Korpus."),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _unsupported_domains(profile: CanonicalProductRegulatoryProfile) -> List[UnsupportedDomain]:
|
||||||
|
env = profile.environmental
|
||||||
|
seen = set()
|
||||||
|
out: List[UnsupportedDomain] = []
|
||||||
|
for field, domain, note in _ENV_DOMAINS:
|
||||||
|
if getattr(env, field) is True and domain not in seen:
|
||||||
|
seen.add(domain)
|
||||||
|
out.append(UnsupportedDomain(domain=domain, trigger=field, note=note))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_product_scope(profile: CanonicalProductRegulatoryProfile) -> ProductScopeResponse:
|
||||||
|
nav = navigate(profile)
|
||||||
|
|
||||||
|
if not nav.completeness_summary.ready_for_scope:
|
||||||
|
return ProductScopeResponse(
|
||||||
|
status=ScopeStatus.NEEDS_FACTS,
|
||||||
|
completeness_summary=nav.completeness_summary,
|
||||||
|
missing_facts=nav.missing_facts,
|
||||||
|
suggested_questions=nav.suggested_questions,
|
||||||
|
)
|
||||||
|
|
||||||
|
scope = discover_scope(to_reasoning_profile(profile)) # exactly once
|
||||||
|
result = RegulatoryScopeResult(
|
||||||
|
applicable_regulations=scope.applicable_regulations,
|
||||||
|
excluded_regulations=scope.excluded_regulations,
|
||||||
|
uncertain_regulations=scope.uncertain_regulations,
|
||||||
|
unsupported_domains=_unsupported_domains(profile),
|
||||||
|
reasoning_summary=scope.reasoning_summary,
|
||||||
|
confidence=scope.confidence,
|
||||||
|
)
|
||||||
|
return ProductScopeResponse(
|
||||||
|
status=ScopeStatus.RESOLVED,
|
||||||
|
completeness_summary=nav.completeness_summary,
|
||||||
|
regulatory_scope=result,
|
||||||
|
)
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
"""Response schemas for the product-scope orchestrator (step 3).
|
||||||
|
|
||||||
|
These are application/API types — NOT compliance-meta-model classes (architecture
|
||||||
|
freeze v1.0 untouched). The scope verdict itself is produced by the existing
|
||||||
|
`discover_scope`; nothing here adds scope rules.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.navigator.engine import CompletenessSummary
|
||||||
|
from compliance.navigator.questions import NavigatorQuestion
|
||||||
|
from compliance.profile.canonical import CanonicalProductRegulatoryProfile
|
||||||
|
from compliance.reasoning.enums import Confidence
|
||||||
|
from compliance.reasoning.schemas import (
|
||||||
|
ApplicableRegulation,
|
||||||
|
ExcludedRegulation,
|
||||||
|
UncertainRegulation,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ScopeStatus(str, Enum):
|
||||||
|
NEEDS_FACTS = "needs_facts" # P0 facts missing -> ask, do not decide
|
||||||
|
RESOLVED = "resolved" # minimum facts present -> scope decided
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedDomain(BaseModel):
|
||||||
|
"""A domain the product triggers but the corpus does not yet cover.
|
||||||
|
|
||||||
|
Surfaced for transparency (no false completeness) — NEVER a legal evaluation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
domain: str
|
||||||
|
trigger: str
|
||||||
|
status: str = "future_corpus_needed"
|
||||||
|
note: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryScopeResult(BaseModel):
|
||||||
|
applicable_regulations: List[ApplicableRegulation] = Field(default_factory=list)
|
||||||
|
excluded_regulations: List[ExcludedRegulation] = Field(default_factory=list)
|
||||||
|
uncertain_regulations: List[UncertainRegulation] = Field(default_factory=list)
|
||||||
|
unsupported_domains: List[UnsupportedDomain] = Field(default_factory=list)
|
||||||
|
reasoning_summary: str = ""
|
||||||
|
confidence: Confidence = Confidence.MEDIUM
|
||||||
|
|
||||||
|
|
||||||
|
class ProductScopeRequest(BaseModel):
|
||||||
|
product_profile: CanonicalProductRegulatoryProfile
|
||||||
|
|
||||||
|
|
||||||
|
class ProductScopeResponse(BaseModel):
|
||||||
|
status: ScopeStatus
|
||||||
|
completeness_summary: CompletenessSummary
|
||||||
|
# case NEEDS_FACTS
|
||||||
|
missing_facts: List[str] = Field(default_factory=list)
|
||||||
|
suggested_questions: List[NavigatorQuestion] = Field(default_factory=list)
|
||||||
|
# case RESOLVED
|
||||||
|
regulatory_scope: Optional[RegulatoryScopeResult] = None
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
"""Product profile convergence layer.
|
||||||
|
|
||||||
|
ONE canonical product profile (`CanonicalProductRegulatoryProfile`) that the Go
|
||||||
|
gap engine and the Python reasoning engine both project from — so "SPS mit
|
||||||
|
Remote Access" means the same thing everywhere. gap.ProductProfile leads; the
|
||||||
|
reasoning ProductProfile is an adapter/DTO. Types + mappers only — no regulation
|
||||||
|
logic, no UI, no new questions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .canonical import (
|
||||||
|
CanonicalLifecyclePhase,
|
||||||
|
CanonicalProductRegulatoryProfile,
|
||||||
|
CanonicalProductType,
|
||||||
|
ComponentKind,
|
||||||
|
EconomicOperatorRole,
|
||||||
|
EnvironmentalImpact,
|
||||||
|
ProductComponent,
|
||||||
|
)
|
||||||
|
from .from_company_profile import from_company_profile
|
||||||
|
from .from_product_wizard import from_product_wizard
|
||||||
|
from .to_gap import to_gap_profile
|
||||||
|
from .to_reasoning import to_reasoning_profile
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"CanonicalProductRegulatoryProfile",
|
||||||
|
"CanonicalProductType",
|
||||||
|
"EconomicOperatorRole",
|
||||||
|
"CanonicalLifecyclePhase",
|
||||||
|
"ComponentKind",
|
||||||
|
"ProductComponent",
|
||||||
|
"EnvironmentalImpact",
|
||||||
|
"from_product_wizard",
|
||||||
|
"from_company_profile",
|
||||||
|
"to_gap_profile",
|
||||||
|
"to_reasoning_profile",
|
||||||
|
]
|
||||||
@@ -0,0 +1,158 @@
|
|||||||
|
"""CanonicalProductRegulatoryProfile — the single semantic product profile.
|
||||||
|
|
||||||
|
Convergence layer (spec 2026-06-26): instead of letting the Go `gap.ProductProfile`
|
||||||
|
and the Python reasoning `ProductProfile` drift, ONE canonical type is the source
|
||||||
|
of truth. The Go gap engine LEADS (it carries real engine logic), so the canonical
|
||||||
|
mirrors gap's field names and adds the Navigator gaps the audit found missing
|
||||||
|
(economic-operator role, radio module, generates_usage_data, lifecycle phase,
|
||||||
|
structured BOM, safety-vs-security split, machine-vs-component) plus a
|
||||||
|
forward-looking Environmental-Impact domain.
|
||||||
|
|
||||||
|
No regulation logic lives here — types only. Mappers live in sibling modules.
|
||||||
|
Python 3.9 compatible (no `|` unions).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class CanonicalProductType(str, Enum): # mirrors gap.ProductType
|
||||||
|
SOFTWARE = "software"
|
||||||
|
HARDWARE = "hardware"
|
||||||
|
IOT = "iot"
|
||||||
|
SAAS = "saas"
|
||||||
|
EXCHANGE = "exchange"
|
||||||
|
MEDICAL_DEVICE = "medical_device"
|
||||||
|
MACHINERY = "machinery"
|
||||||
|
OTHER = "other"
|
||||||
|
|
||||||
|
|
||||||
|
class EconomicOperatorRole(str, Enum): # CE/CRA role — gap.ProductProfile has none
|
||||||
|
MANUFACTURER = "manufacturer"
|
||||||
|
IMPORTER = "importer"
|
||||||
|
DISTRIBUTOR = "distributor"
|
||||||
|
INTEGRATOR = "integrator"
|
||||||
|
OPERATOR = "operator"
|
||||||
|
SERVICE_PROVIDER = "service_provider"
|
||||||
|
|
||||||
|
|
||||||
|
class CanonicalLifecyclePhase(str, Enum):
|
||||||
|
DEVELOPMENT = "development"
|
||||||
|
PLACING_ON_MARKET = "placing_on_market"
|
||||||
|
OPERATION = "operation"
|
||||||
|
MAINTENANCE = "maintenance"
|
||||||
|
UPDATE = "update"
|
||||||
|
END_OF_LIFE = "end_of_life"
|
||||||
|
|
||||||
|
|
||||||
|
class ComponentKind(str, Enum):
|
||||||
|
MOTOR = "motor"
|
||||||
|
PUMP = "pump"
|
||||||
|
HEATING = "heating"
|
||||||
|
COOLING = "cooling"
|
||||||
|
CONTROLLER = "controller"
|
||||||
|
PLC = "plc"
|
||||||
|
HMI = "hmi"
|
||||||
|
SENSOR = "sensor"
|
||||||
|
ACTUATOR = "actuator"
|
||||||
|
CAMERA = "camera"
|
||||||
|
NETWORK_INTERFACE = "network_interface"
|
||||||
|
RADIO_MODULE = "radio_module"
|
||||||
|
CHEMICAL_DOSING = "chemical_dosing"
|
||||||
|
WATER_INLET = "water_inlet"
|
||||||
|
WASTEWATER_OUTLET = "wastewater_outlet"
|
||||||
|
BATTERY = "battery"
|
||||||
|
OTHER = "other"
|
||||||
|
|
||||||
|
|
||||||
|
class ProductComponent(BaseModel):
|
||||||
|
"""One structured BOM node — these nodes are what later trigger domains."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
kind: ComponentKind = ComponentKind.OTHER
|
||||||
|
notes: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class EnvironmentalImpact(BaseModel):
|
||||||
|
"""Forward-looking Umweltmedien-Trigger (own Navigator domain).
|
||||||
|
|
||||||
|
No regulation logic consumes these yet — profile fields only, so the model
|
||||||
|
is not blind to wastewater/air/chemicals/waste questions when that domain
|
||||||
|
is wired later (AbwV/WRRL/REACH/CLP/IED/BImSchG ...).
|
||||||
|
"""
|
||||||
|
|
||||||
|
discharges_to_wastewater: Optional[bool] = None
|
||||||
|
uses_cleaning_chemicals: Optional[bool] = None
|
||||||
|
supplies_chemicals: Optional[bool] = None
|
||||||
|
emits_to_air: Optional[bool] = None
|
||||||
|
uses_solvents: Optional[bool] = None
|
||||||
|
creates_waste: Optional[bool] = None
|
||||||
|
contains_restricted_substances: Optional[bool] = None
|
||||||
|
consumes_energy_or_water: Optional[bool] = None
|
||||||
|
has_cooling_or_spraying_water: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
|
class CanonicalProductRegulatoryProfile(BaseModel):
|
||||||
|
# --- identity ---
|
||||||
|
name: str = ""
|
||||||
|
description: str = ""
|
||||||
|
product_type: Optional[CanonicalProductType] = None
|
||||||
|
product_profile_id: Optional[str] = None
|
||||||
|
tenant_id: Optional[str] = None
|
||||||
|
iace_project_id: Optional[str] = None
|
||||||
|
|
||||||
|
# --- gap-native lists ---
|
||||||
|
technologies: List[str] = Field(default_factory=list)
|
||||||
|
data_processing: List[str] = Field(default_factory=list)
|
||||||
|
markets: List[str] = Field(default_factory=list) # real list — never hardcoded ['EU']
|
||||||
|
existing_certifications: List[str] = Field(default_factory=list)
|
||||||
|
applied_norms: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
# --- gap-native product / IST-state booleans (tri-state: None = unknown) ---
|
||||||
|
connected_to_internet: Optional[bool] = None
|
||||||
|
has_software_updates: Optional[bool] = None
|
||||||
|
uses_ai: Optional[bool] = None
|
||||||
|
processes_personal_data: Optional[bool] = None
|
||||||
|
is_critical_infra_supplier: Optional[bool] = None
|
||||||
|
has_risk_assessment: Optional[bool] = None
|
||||||
|
has_technical_file: Optional[bool] = None
|
||||||
|
has_operating_manual: Optional[bool] = None
|
||||||
|
has_sbom: Optional[bool] = None
|
||||||
|
has_vuln_management: Optional[bool] = None
|
||||||
|
has_update_mechanism: Optional[bool] = None
|
||||||
|
has_incident_response: Optional[bool] = None
|
||||||
|
has_supply_chain_mgmt: Optional[bool] = None
|
||||||
|
ce_marking_since: Optional[str] = None
|
||||||
|
product_age: Optional[str] = None
|
||||||
|
|
||||||
|
# --- NEW Navigator-gap fields (audit 2026-06-26) ---
|
||||||
|
economic_operator_role: Optional[EconomicOperatorRole] = None
|
||||||
|
has_radio_module: Optional[bool] = None
|
||||||
|
generates_usage_data: Optional[bool] = None
|
||||||
|
lifecycle_phase: Optional[CanonicalLifecyclePhase] = None
|
||||||
|
components: List[ProductComponent] = Field(default_factory=list)
|
||||||
|
has_safety_function: Optional[bool] = None
|
||||||
|
safety_function_description: Optional[str] = None
|
||||||
|
has_security_function: Optional[bool] = None # safety vs security split
|
||||||
|
has_remote_access: Optional[bool] = None
|
||||||
|
has_embedded_software: Optional[bool] = None
|
||||||
|
is_machine: Optional[bool] = None
|
||||||
|
is_component: Optional[bool] = None
|
||||||
|
is_spare_part: Optional[bool] = None
|
||||||
|
|
||||||
|
# --- company / market context (NIS2 + scope; from company-profile) ---
|
||||||
|
b2b_or_b2c: Optional[str] = None
|
||||||
|
sector_industry: Optional[str] = None
|
||||||
|
company_size: Optional[str] = None
|
||||||
|
primary_jurisdiction: Optional[str] = None
|
||||||
|
|
||||||
|
# --- AI context (classification stays delegated to ai-act/ucca) ---
|
||||||
|
ai_integration_type: List[str] = Field(default_factory=list)
|
||||||
|
human_oversight_level: Optional[str] = None
|
||||||
|
|
||||||
|
# --- forward-looking environmental domain ---
|
||||||
|
environmental: EnvironmentalImpact = Field(default_factory=EnvironmentalImpact)
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
"""company-profile -> CanonicalProductRegulatoryProfile (prefill, acceptance #2).
|
||||||
|
|
||||||
|
Pulls master data (industry, business model, size, markets) and the conditional
|
||||||
|
`machine_builder` block (camelCase JSONB keys, defined frontend-side) so the user
|
||||||
|
re-answers nothing. The machineBuilder block is the richest product/safety/
|
||||||
|
connectivity source — note it is industry-gated in the UI, so a prefill may find
|
||||||
|
it empty; that is fine (fields stay None = unknown).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
from .canonical import CanonicalProductRegulatoryProfile
|
||||||
|
|
||||||
|
_EU_MEMBER_HINTS = {"DE", "AT", "FR", "IT", "NL", "LU", "LI", "EU", "EWR", "EEA", "DACH"}
|
||||||
|
|
||||||
|
|
||||||
|
def _markets(p: Dict[str, Any], mb: Dict[str, Any]) -> List[str]:
|
||||||
|
out: List[str] = []
|
||||||
|
for source in (p.get("target_markets"), mb.get("exportMarkets"), [p.get("primary_jurisdiction")], [p.get("headquarters_country")]):
|
||||||
|
for m in source or []:
|
||||||
|
if m and m not in out:
|
||||||
|
out.append(m)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _is_machine(mb: Dict[str, Any]) -> Any:
|
||||||
|
types = mb.get("productTypes")
|
||||||
|
if types:
|
||||||
|
return True
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def from_company_profile(profile: Dict[str, Any]) -> CanonicalProductRegulatoryProfile:
|
||||||
|
p = profile
|
||||||
|
mb = p.get("machine_builder") or {}
|
||||||
|
contains_ai = mb.get("containsAI")
|
||||||
|
uses_ai = contains_ai if contains_ai is not None else p.get("uses_ai")
|
||||||
|
return CanonicalProductRegulatoryProfile(
|
||||||
|
description=mb.get("productDescription") or "",
|
||||||
|
sector_industry=p.get("industry") or None,
|
||||||
|
b2b_or_b2c=p.get("business_model") or None,
|
||||||
|
company_size=p.get("company_size") or None,
|
||||||
|
primary_jurisdiction=p.get("primary_jurisdiction") or None,
|
||||||
|
markets=_markets(p, mb),
|
||||||
|
uses_ai=uses_ai,
|
||||||
|
ai_integration_type=list(mb.get("aiIntegrationType") or []),
|
||||||
|
human_oversight_level=mb.get("humanOversightLevel") or None,
|
||||||
|
has_embedded_software=mb.get("containsFirmware"),
|
||||||
|
has_safety_function=mb.get("hasSafetyFunction"),
|
||||||
|
safety_function_description=mb.get("safetyFunctionDescription") or None,
|
||||||
|
has_remote_access=mb.get("hasRemoteAccess"),
|
||||||
|
connected_to_internet=mb.get("isNetworked"),
|
||||||
|
has_software_updates=mb.get("hasOTAUpdates"),
|
||||||
|
has_risk_assessment=mb.get("hasRiskAssessment"),
|
||||||
|
is_machine=_is_machine(mb),
|
||||||
|
is_critical_infra_supplier=mb.get("criticalSectorClients"),
|
||||||
|
)
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
"""ProductWizard payload -> CanonicalProductRegulatoryProfile (lossless).
|
||||||
|
|
||||||
|
The gap-analysis ProductWizard POSTs exactly the gap.ProductProfile JSON shape
|
||||||
|
(see admin-compliance/.../ProductWizard.tsx handleSubmit). This mapper copies
|
||||||
|
every gap field verbatim so that `to_gap_profile(from_product_wizard(p))`
|
||||||
|
reproduces the gap subset of `p` byte-for-byte (acceptance #1). New Navigator
|
||||||
|
fields the wizard does not ask stay None.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from .canonical import CanonicalProductRegulatoryProfile, CanonicalProductType
|
||||||
|
|
||||||
|
|
||||||
|
def _as_product_type(value: Any) -> Optional[CanonicalProductType]:
|
||||||
|
try:
|
||||||
|
return CanonicalProductType(value)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def from_product_wizard(payload: Dict[str, Any]) -> CanonicalProductRegulatoryProfile:
|
||||||
|
g = payload.get
|
||||||
|
return CanonicalProductRegulatoryProfile(
|
||||||
|
name=g("name", ""),
|
||||||
|
description=g("description", ""),
|
||||||
|
product_type=_as_product_type(g("product_type")),
|
||||||
|
technologies=list(g("technologies") or []),
|
||||||
|
data_processing=list(g("data_processing") or []),
|
||||||
|
markets=list(g("markets") or []),
|
||||||
|
existing_certifications=list(g("existing_certifications") or []),
|
||||||
|
applied_norms=list(g("applied_norms") or []),
|
||||||
|
connected_to_internet=g("connected_to_internet"),
|
||||||
|
has_software_updates=g("has_software_updates"),
|
||||||
|
uses_ai=g("uses_ai"),
|
||||||
|
processes_personal_data=g("processes_personal_data"),
|
||||||
|
is_critical_infra_supplier=g("is_critical_infra_supplier"),
|
||||||
|
has_risk_assessment=g("has_risk_assessment"),
|
||||||
|
has_technical_file=g("has_technical_file"),
|
||||||
|
has_operating_manual=g("has_operating_manual"),
|
||||||
|
has_sbom=g("has_sbom"),
|
||||||
|
has_vuln_management=g("has_vuln_management"),
|
||||||
|
has_update_mechanism=g("has_update_mechanism"),
|
||||||
|
has_incident_response=g("has_incident_response"),
|
||||||
|
has_supply_chain_mgmt=g("has_supply_chain_mgmt"),
|
||||||
|
ce_marking_since=g("ce_marking_since"),
|
||||||
|
product_age=g("product_age"),
|
||||||
|
)
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
"""CanonicalProductRegulatoryProfile -> gap.ProductProfile JSON shape.
|
||||||
|
|
||||||
|
Emits exactly the keys the Go gap engine already consumes (gap/models.go json
|
||||||
|
tags), so the gap engine runs UNCHANGED — the canonical is a superset and gap is
|
||||||
|
its lossless projection. Canonical-only fields (role/radio/components/...) are
|
||||||
|
intentionally not emitted here; they reach the reasoning side via to_reasoning.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from .canonical import CanonicalProductRegulatoryProfile
|
||||||
|
|
||||||
|
|
||||||
|
def to_gap_profile(c: CanonicalProductRegulatoryProfile) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"name": c.name,
|
||||||
|
"description": c.description,
|
||||||
|
"product_type": c.product_type.value if c.product_type else "",
|
||||||
|
"technologies": list(c.technologies),
|
||||||
|
"data_processing": list(c.data_processing),
|
||||||
|
"markets": list(c.markets),
|
||||||
|
"existing_certifications": list(c.existing_certifications),
|
||||||
|
"applied_norms": list(c.applied_norms),
|
||||||
|
"connected_to_internet": bool(c.connected_to_internet),
|
||||||
|
"has_software_updates": bool(c.has_software_updates),
|
||||||
|
"uses_ai": bool(c.uses_ai),
|
||||||
|
"processes_personal_data": bool(c.processes_personal_data),
|
||||||
|
"is_critical_infra_supplier": bool(c.is_critical_infra_supplier),
|
||||||
|
"has_risk_assessment": bool(c.has_risk_assessment),
|
||||||
|
"has_technical_file": bool(c.has_technical_file),
|
||||||
|
"has_operating_manual": bool(c.has_operating_manual),
|
||||||
|
"has_sbom": bool(c.has_sbom),
|
||||||
|
"has_vuln_management": bool(c.has_vuln_management),
|
||||||
|
"has_update_mechanism": bool(c.has_update_mechanism),
|
||||||
|
"has_incident_response": bool(c.has_incident_response),
|
||||||
|
"has_supply_chain_mgmt": bool(c.has_supply_chain_mgmt),
|
||||||
|
"ce_marking_since": c.ce_marking_since if c.ce_marking_since is not None else "",
|
||||||
|
"product_age": c.product_age if c.product_age is not None else "",
|
||||||
|
}
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
"""CanonicalProductRegulatoryProfile -> reasoning ProductProfile (adapter/DTO).
|
||||||
|
|
||||||
|
The reasoning engine stays the consumer, never the source of truth (spec): the
|
||||||
|
canonical leads, this projects it into the Python reasoning ProductProfile so the
|
||||||
|
Reasoning engine and the Go gap engine run off ONE semantic profile (acceptance
|
||||||
|
#10). AI classification is NOT done here — only `uses_ai` is forwarded; risk
|
||||||
|
classification stays delegated to ai-act/ucca (acceptance #3).
|
||||||
|
|
||||||
|
This is the ONLY one-way coupling profile -> reasoning; reasoning never imports
|
||||||
|
profile, so the reasoning layer stays hermetic.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import ManufacturerRole, MarketModel, ProductLifecyclePhase
|
||||||
|
from compliance.reasoning.schemas import ProductProfile
|
||||||
|
|
||||||
|
from .canonical import CanonicalProductRegulatoryProfile, CanonicalProductType
|
||||||
|
|
||||||
|
_SOFTWARE_TYPES = {CanonicalProductType.SOFTWARE, CanonicalProductType.SAAS, CanonicalProductType.IOT}
|
||||||
|
_SOFTWARE_TECH = {"ai", "api", "database", "encryption", "ota_updates", "cloud", "blockchain"}
|
||||||
|
_EU_HINTS = {"DE", "AT", "FR", "IT", "NL", "LU", "LI", "EU", "EWR", "EEA", "DACH"}
|
||||||
|
_B2X = {"B2B": MarketModel.B2B, "B2C": MarketModel.B2C, "B2B_B2C": MarketModel.BOTH, "B2B2C": MarketModel.BOTH}
|
||||||
|
|
||||||
|
|
||||||
|
def _or_none(*values: Optional[bool]) -> Optional[bool]:
|
||||||
|
"""True if any value is truthy; None if all are None/absent; else False."""
|
||||||
|
if any(v is True for v in values):
|
||||||
|
return True
|
||||||
|
if all(v is None for v in values):
|
||||||
|
return None
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _has_software(c: CanonicalProductRegulatoryProfile) -> Optional[bool]:
|
||||||
|
type_sig = True if c.product_type in _SOFTWARE_TYPES else None
|
||||||
|
tech_sig = True if (set(c.technologies) & _SOFTWARE_TECH) else None
|
||||||
|
return _or_none(c.has_embedded_software, c.has_software_updates, c.uses_ai, type_sig, tech_sig)
|
||||||
|
|
||||||
|
|
||||||
|
def _eu_market(markets: List[str]) -> Optional[bool]:
|
||||||
|
if not markets:
|
||||||
|
return None
|
||||||
|
return True if (set(markets) & _EU_HINTS) else False
|
||||||
|
|
||||||
|
|
||||||
|
def _has_radio(c: CanonicalProductRegulatoryProfile) -> Optional[bool]:
|
||||||
|
if c.has_radio_module is not None:
|
||||||
|
return c.has_radio_module
|
||||||
|
if any(comp.kind.value == "radio_module" for comp in c.components):
|
||||||
|
return True
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def to_reasoning_profile(c: CanonicalProductRegulatoryProfile) -> ProductProfile:
|
||||||
|
role = ManufacturerRole(c.economic_operator_role.value) if c.economic_operator_role else None
|
||||||
|
phase = ProductLifecyclePhase(c.lifecycle_phase.value) if c.lifecycle_phase else None
|
||||||
|
b2x = _B2X.get(c.b2b_or_b2c) if c.b2b_or_b2c else None
|
||||||
|
is_machine = c.is_machine if c.is_machine is not None else (
|
||||||
|
True if c.product_type == CanonicalProductType.MACHINERY else None
|
||||||
|
)
|
||||||
|
generates_data = c.generates_usage_data if c.generates_usage_data is not None else (
|
||||||
|
True if "telemetry" in c.data_processing else None
|
||||||
|
)
|
||||||
|
return ProductProfile(
|
||||||
|
product_name=c.name or "Produkt",
|
||||||
|
product_profile_id=c.product_profile_id,
|
||||||
|
manufacturer_role=role,
|
||||||
|
product_type=[c.product_type.value] if c.product_type else [],
|
||||||
|
has_software=_has_software(c),
|
||||||
|
has_embedded_software=c.has_embedded_software,
|
||||||
|
has_remote_access=c.has_remote_access,
|
||||||
|
has_cloud_connection=True if "cloud" in c.technologies else None,
|
||||||
|
has_ai_functionality=c.uses_ai,
|
||||||
|
has_radio_module=_has_radio(c),
|
||||||
|
has_safety_function=c.has_safety_function,
|
||||||
|
generates_usage_data=generates_data,
|
||||||
|
is_machine=is_machine,
|
||||||
|
is_component=c.is_component,
|
||||||
|
is_spare_part=c.is_spare_part,
|
||||||
|
eu_market=_eu_market(c.markets),
|
||||||
|
b2b_or_b2c=b2x,
|
||||||
|
lifecycle_phase=phase,
|
||||||
|
company_size=c.company_size,
|
||||||
|
sector=c.sector_industry,
|
||||||
|
)
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
"""Regulatory Change Intelligence (RCI) — delta layer over the product-first map.
|
||||||
|
|
||||||
|
Answers "what changes relative to my existing Regulatory Map?" — NOT "what does
|
||||||
|
the new law say in general". Snapshot the pipeline into a ComplianceBaseline, then
|
||||||
|
assess a (simulated/provided) RegulatoryChange into per-obligation deltas + a
|
||||||
|
management ChangeImpactSummary. Read/reasoning only — no UI, no ingestion, no RAG,
|
||||||
|
no new regulations/controls, no legal evaluation outside the stored map.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .baseline import create_baseline
|
||||||
|
from .delta_engine import assess_change
|
||||||
|
from .schemas import (
|
||||||
|
ChangeAssessment,
|
||||||
|
ChangeImpactSummary,
|
||||||
|
ChangeType,
|
||||||
|
ComplianceBaseline,
|
||||||
|
DeltaType,
|
||||||
|
ObligationDelta,
|
||||||
|
RegulatoryChange,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"create_baseline",
|
||||||
|
"assess_change",
|
||||||
|
"ComplianceBaseline",
|
||||||
|
"RegulatoryChange",
|
||||||
|
"ObligationDelta",
|
||||||
|
"ChangeImpactSummary",
|
||||||
|
"ChangeAssessment",
|
||||||
|
"DeltaType",
|
||||||
|
"ChangeType",
|
||||||
|
]
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
"""Snapshot the current product-first pipeline into a ComplianceBaseline.
|
||||||
|
|
||||||
|
This is the ONLY place RCI runs the pipeline — to freeze a point-in-time map +
|
||||||
|
registry-linked obligations + their required evidence. Everything downstream
|
||||||
|
(delta computation) works purely against this snapshot, never re-evaluating.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from compliance.profile.canonical import CanonicalProductRegulatoryProfile
|
||||||
|
from compliance.profile.to_reasoning import to_reasoning_profile
|
||||||
|
from compliance.reasoning.obligation_engine import derive_obligations
|
||||||
|
from compliance.regulatory_map.renderer import render_regulatory_map
|
||||||
|
|
||||||
|
from .schemas import ComplianceBaseline
|
||||||
|
|
||||||
|
|
||||||
|
def create_baseline(
|
||||||
|
profile: CanonicalProductRegulatoryProfile,
|
||||||
|
evidence_refs: Optional[Dict[str, List[str]]] = None,
|
||||||
|
baseline_id: str = "baseline",
|
||||||
|
created_at: Optional[str] = None,
|
||||||
|
) -> ComplianceBaseline:
|
||||||
|
reg_map = render_regulatory_map(profile)
|
||||||
|
obligations = derive_obligations(to_reasoning_profile(profile)).applicable_obligations
|
||||||
|
|
||||||
|
applicable: List[str] = []
|
||||||
|
required: Dict[str, List[str]] = {}
|
||||||
|
for ob in obligations:
|
||||||
|
if ob.registry_anchor: # only registry-linked obligations enter the baseline
|
||||||
|
applicable.append(ob.obligation_id)
|
||||||
|
required[ob.obligation_id] = list(ob.required_evidence)
|
||||||
|
|
||||||
|
return ComplianceBaseline(
|
||||||
|
baseline_id=baseline_id,
|
||||||
|
product_profile_snapshot=profile,
|
||||||
|
regulatory_map_snapshot=reg_map,
|
||||||
|
applicable_obligations=applicable,
|
||||||
|
obligation_evidence_required=required,
|
||||||
|
evidence_refs=dict(evidence_refs or {}),
|
||||||
|
created_at=created_at,
|
||||||
|
)
|
||||||
@@ -0,0 +1,114 @@
|
|||||||
|
"""RCI delta engine — assess a RegulatoryChange against a ComplianceBaseline.
|
||||||
|
|
||||||
|
Answers "what changes relative to my existing Map?" deterministically, working
|
||||||
|
ONLY against the stored baseline (no re-evaluation of scope, no new legal
|
||||||
|
assessment outside the map). Per-obligation classification -> ObligationDelta;
|
||||||
|
aggregate -> ChangeImpactSummary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
from compliance.reasoning.enums import Confidence
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
ChangeAssessment,
|
||||||
|
ChangeImpactSummary,
|
||||||
|
ChangeType,
|
||||||
|
ComplianceBaseline,
|
||||||
|
DeltaType,
|
||||||
|
ObligationDelta,
|
||||||
|
RegulatoryChange,
|
||||||
|
)
|
||||||
|
|
||||||
|
_ACTION = {DeltaType.NEW, DeltaType.CHANGED, DeltaType.NEEDS_REVIEW}
|
||||||
|
|
||||||
|
|
||||||
|
def _classify(
|
||||||
|
in_base: bool, has_ev: bool, change_type: ChangeType, rel_app: bool, rel_unc: bool
|
||||||
|
) -> Tuple[DeltaType, str, Confidence]:
|
||||||
|
if not (rel_app or rel_unc):
|
||||||
|
return DeltaType.NOT_APPLICABLE, "Die Änderung betrifft kein Regelwerk Ihrer Map.", Confidence.HIGH
|
||||||
|
if rel_unc and not rel_app:
|
||||||
|
return (
|
||||||
|
DeltaType.NEEDS_REVIEW,
|
||||||
|
"Betrifft ein für Ihr Produkt noch UNSICHERES Regelwerk — erst Anwendbarkeit klären.",
|
||||||
|
Confidence.LOW,
|
||||||
|
)
|
||||||
|
if change_type == ChangeType.REPEAL:
|
||||||
|
if in_base:
|
||||||
|
return DeltaType.REMOVED, "Regelwerk/Pflicht aufgehoben — entfällt für Ihr Produkt.", Confidence.HIGH
|
||||||
|
return DeltaType.NOT_APPLICABLE, "Aufhebung betrifft keine Ihrer bestehenden Pflichten.", Confidence.HIGH
|
||||||
|
if not in_base:
|
||||||
|
return DeltaType.NEW, "Neue Pflicht durch die Änderung — bisher nicht in Ihrer Map.", Confidence.MEDIUM
|
||||||
|
if change_type == ChangeType.GUIDANCE_UPDATE:
|
||||||
|
if has_ev:
|
||||||
|
return (
|
||||||
|
DeltaType.ALREADY_COVERED,
|
||||||
|
"Bestehende Pflicht mit vorhandenen Nachweisen — Leitlinien-Update vermutlich abgedeckt.",
|
||||||
|
Confidence.MEDIUM,
|
||||||
|
)
|
||||||
|
return DeltaType.NEEDS_REVIEW, "Bestehende Pflicht ohne Nachweis — Leitlinien-Update prüfen.", Confidence.MEDIUM
|
||||||
|
return DeltaType.CHANGED, "Bestehende Pflicht inhaltlich geändert — Umsetzung und Nachweis prüfen.", Confidence.MEDIUM
|
||||||
|
|
||||||
|
|
||||||
|
def assess_change(baseline: ComplianceBaseline, change: RegulatoryChange) -> ChangeAssessment:
|
||||||
|
snap = baseline.regulatory_map_snapshot
|
||||||
|
app_regs = {v.regulation_id for v in snap.applicable_regulations}
|
||||||
|
unc_regs = {v.regulation_id for v in snap.uncertain_regulations}
|
||||||
|
base_obs = set(baseline.applicable_obligations)
|
||||||
|
|
||||||
|
affected = set(change.affected_regulations)
|
||||||
|
rel_app = bool(affected & app_regs)
|
||||||
|
rel_unc = bool(affected & unc_regs)
|
||||||
|
affects_product = rel_app or rel_unc
|
||||||
|
|
||||||
|
deltas: List[ObligationDelta] = []
|
||||||
|
for ob in change.affected_obligations:
|
||||||
|
present = baseline.evidence_refs.get(ob, [])
|
||||||
|
required = baseline.obligation_evidence_required.get(ob, [])
|
||||||
|
dt, reason, conf = _classify(ob in base_obs, bool(present), change.change_type, rel_app, rel_unc)
|
||||||
|
missing = [e for e in required if e not in present] if dt in _ACTION else []
|
||||||
|
deltas.append(
|
||||||
|
ObligationDelta(
|
||||||
|
obligation_id=ob,
|
||||||
|
delta_type=dt,
|
||||||
|
reason=reason,
|
||||||
|
affected_evidence=list(present),
|
||||||
|
missing_evidence=missing,
|
||||||
|
confidence=conf,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return ChangeAssessment(
|
||||||
|
change_id=change.change_id,
|
||||||
|
affects_product=affects_product,
|
||||||
|
deltas=deltas,
|
||||||
|
summary=_summary(deltas, [d.domain for d in snap.unsupported_domains]),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _ids(deltas: List[ObligationDelta], *types: DeltaType) -> List[str]:
|
||||||
|
wanted = set(types)
|
||||||
|
return [d.obligation_id for d in deltas if d.delta_type in wanted]
|
||||||
|
|
||||||
|
|
||||||
|
def _summary(deltas: List[ObligationDelta], unsupported: List[str]) -> ChangeImpactSummary:
|
||||||
|
n_new = len(_ids(deltas, DeltaType.NEW))
|
||||||
|
n_changed = len(_ids(deltas, DeltaType.CHANGED))
|
||||||
|
n_removed = len(_ids(deltas, DeltaType.REMOVED))
|
||||||
|
n_covered = len(_ids(deltas, DeltaType.ALREADY_COVERED))
|
||||||
|
n_review = len(_ids(deltas, DeltaType.NEEDS_REVIEW, DeltaType.CHANGED))
|
||||||
|
n_na = len(_ids(deltas, DeltaType.NOT_APPLICABLE))
|
||||||
|
return ChangeImpactSummary(
|
||||||
|
what_changed=(
|
||||||
|
"%d neu, %d geändert, %d entfällt, %d bereits abgedeckt, %d zu prüfen, %d nicht relevant."
|
||||||
|
% (n_new, n_changed, n_removed, n_covered, n_review, n_na)
|
||||||
|
),
|
||||||
|
what_matters_for_this_product=_ids(deltas, *_ACTION),
|
||||||
|
already_covered=_ids(deltas, DeltaType.ALREADY_COVERED),
|
||||||
|
needs_review=_ids(deltas, DeltaType.NEEDS_REVIEW, DeltaType.CHANGED),
|
||||||
|
not_relevant=_ids(deltas, DeltaType.NOT_APPLICABLE),
|
||||||
|
unsupported_domains=unsupported,
|
||||||
|
)
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
"""Regulatory Change Intelligence (RCI) — domain objects.
|
||||||
|
|
||||||
|
RCI is a read-/reasoning layer ON TOP of the product-first pipeline. It answers
|
||||||
|
"what changes relative to my existing Regulatory Map?" — NOT "what does the new
|
||||||
|
law say in general". A RegulatoryChange is simulated/provided INPUT (no ingestion,
|
||||||
|
no newsletter/mailbox, no RAG); the delta is computed against a stored
|
||||||
|
ComplianceBaseline (snapshot of the map).
|
||||||
|
|
||||||
|
`delta_type` is a THIRD vocabulary — distinct from `ClaimCoverage` (Welt 1, what
|
||||||
|
the customer claims) and `ComplianceStatus` (Welt 2, verified evidence). The three
|
||||||
|
must never be conflated. These are application/reasoning types, NOT
|
||||||
|
compliance-meta-model classes (architecture freeze v1.0 untouched).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from compliance.profile.canonical import CanonicalProductRegulatoryProfile
|
||||||
|
from compliance.reasoning.enums import AuthorityLevel, Confidence
|
||||||
|
from compliance.regulatory_map.schemas import RegulatoryMap
|
||||||
|
|
||||||
|
|
||||||
|
class DeltaType(str, Enum):
|
||||||
|
NEW = "new" # obligation now applies that was not in the baseline
|
||||||
|
CHANGED = "changed" # existing obligation substantively modified
|
||||||
|
REMOVED = "removed" # obligation no longer applies (repeal)
|
||||||
|
ALREADY_COVERED = "already_covered" # existing obligation, evidence likely suffices
|
||||||
|
NEEDS_REVIEW = "needs_review" # a human must check
|
||||||
|
NOT_APPLICABLE = "not_applicable" # change does not touch this product's map
|
||||||
|
|
||||||
|
|
||||||
|
class ChangeType(str, Enum):
|
||||||
|
NEW_REGULATION = "new_regulation"
|
||||||
|
AMENDMENT = "amendment"
|
||||||
|
REPEAL = "repeal"
|
||||||
|
GUIDANCE_UPDATE = "guidance_update"
|
||||||
|
|
||||||
|
|
||||||
|
# ── stored snapshot ──────────────────────────────────────────────────────
|
||||||
|
class ComplianceBaseline(BaseModel):
|
||||||
|
baseline_id: str
|
||||||
|
product_profile_snapshot: CanonicalProductRegulatoryProfile
|
||||||
|
regulatory_map_snapshot: RegulatoryMap
|
||||||
|
applicable_obligations: List[str] = Field(default_factory=list) # registry-linked obligation_ids
|
||||||
|
# required evidence per obligation (derived) — to compute missing_evidence
|
||||||
|
obligation_evidence_required: Dict[str, List[str]] = Field(default_factory=dict)
|
||||||
|
# evidence the customer ALREADY has, per obligation (provided)
|
||||||
|
evidence_refs: Dict[str, List[str]] = Field(default_factory=dict)
|
||||||
|
created_at: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ── simulated/provided change (INPUT — never ingested) ───────────────────
|
||||||
|
class RegulatoryChange(BaseModel):
|
||||||
|
change_id: str
|
||||||
|
source: str = "simulated"
|
||||||
|
affected_regulations: List[str] = Field(default_factory=list)
|
||||||
|
affected_obligations: List[str] = Field(default_factory=list)
|
||||||
|
change_type: ChangeType
|
||||||
|
effective_date: Optional[str] = None
|
||||||
|
authority_level: AuthorityLevel = AuthorityLevel.LEGAL_TEXT
|
||||||
|
summary: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ── per-obligation delta ─────────────────────────────────────────────────
|
||||||
|
class ObligationDelta(BaseModel):
|
||||||
|
obligation_id: str
|
||||||
|
delta_type: DeltaType
|
||||||
|
reason: str
|
||||||
|
affected_evidence: List[str] = Field(default_factory=list) # evidence already present for it
|
||||||
|
missing_evidence: List[str] = Field(default_factory=list) # required but not yet present
|
||||||
|
confidence: Confidence
|
||||||
|
|
||||||
|
|
||||||
|
# ── management-level summary ──────────────────────────────────────────────
|
||||||
|
class ChangeImpactSummary(BaseModel):
|
||||||
|
what_changed: str = ""
|
||||||
|
what_matters_for_this_product: List[str] = Field(default_factory=list) # need action
|
||||||
|
already_covered: List[str] = Field(default_factory=list)
|
||||||
|
needs_review: List[str] = Field(default_factory=list)
|
||||||
|
not_relevant: List[str] = Field(default_factory=list)
|
||||||
|
unsupported_domains: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class ChangeAssessment(BaseModel):
|
||||||
|
change_id: str
|
||||||
|
affects_product: bool
|
||||||
|
deltas: List[ObligationDelta] = Field(default_factory=list)
|
||||||
|
summary: ChangeImpactSummary
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
"""Regulatory Reasoning Engine.
|
||||||
|
|
||||||
|
A deterministic reasoning layer ON TOP of the Legal Knowledge Graph (obligation
|
||||||
|
registry) and the Compliance Execution Graph (control mapping / evidence). It
|
||||||
|
answers, for a concrete product: which regulations apply, which obligations
|
||||||
|
follow, whether the customer's implementation covers them, and whether a
|
||||||
|
customer interpretation is legally sound.
|
||||||
|
|
||||||
|
No new RAG, no new controls, no DB schema changes — scope & reasoning metamodel
|
||||||
|
only (spec §14).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .claim_normalizer import normalize_claim
|
||||||
|
from .implementation_engine import reason_implementation_claim
|
||||||
|
from .interpretation_engine import assess_interpretation
|
||||||
|
from .obligation_engine import derive_obligations
|
||||||
|
from .scope_engine import discover_scope
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"discover_scope",
|
||||||
|
"derive_obligations",
|
||||||
|
"normalize_claim",
|
||||||
|
"reason_implementation_claim",
|
||||||
|
"assess_interpretation",
|
||||||
|
]
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user