diff --git a/backend-compliance/knowledge/vocabulary/journey_classes.yaml b/backend-compliance/knowledge/vocabulary/journey_classes.yaml new file mode 100644 index 00000000..557c5377 --- /dev/null +++ b/backend-compliance/knowledge/vocabulary/journey_classes.yaml @@ -0,0 +1,30 @@ +# Domain Vocabulary — Journey CLASSES (PROVISIONAL). A class clusters journey instances that are +# "the same reise". So we do NOT write a new journey for every certification when many share a class. +# PROVISIONAL: Journey Class is a NEW abstraction -> its OWN Rule of Three (>= 3 instances per class +# before minting MJRN ids). Endpoints reference regulation vocabulary ids (see regulations.yaml). + +id: VOCAB-journey-classes-v1 +status: provisional +classes: + - id: infosec-to-product-cyber # provisional id, NOT a minted MJRN + name: "Information Security → Product Cybersecurity" + from_kind: information_security + to_kind: product_cybersecurity + instances: + - {from: iso27001, to: cra} # ✅ modelled (TP-ISO27001-CRA-v1) + - {from: tisax, to: cra} # ⏳ Rule-of-Three transition #3 + - {from: iec62443, to: cra} # ⏳ + + - id: qm-to-product-compliance + name: "Quality Management → Product Compliance/Safety" + from_kind: quality_management + to_kind: product_compliance_safety + instances: + - {from: iso9001, to: cra} # ✅ modelled (TP-ISO9001-CRA-v1) + - {from: iso9001, to: maschinenvo} # ⏳ Rule-of-Three transition #2 — INSTANCE of this class, not a new kind + - {from: iso13485, to: mdr} # same CLASS, different domain (medical) — proves the class generalises + +note: > + Befund: ISO9001→MaschinenVO ist KEINE neue Journey-Art, sondern eine INSTANZ der Klasse + „Quality Management → Product Compliance/Safety" (wie ISO9001→CRA, ISO13485→MDR). Das ist genau die + Duplikation, die das Vokabular verhindert. diff --git a/backend-compliance/knowledge/vocabulary/regulations.yaml b/backend-compliance/knowledge/vocabulary/regulations.yaml new file mode 100644 index 00000000..556651c6 --- /dev/null +++ b/backend-compliance/knowledge/vocabulary/regulations.yaml @@ -0,0 +1,21 @@ +# Domain Vocabulary — regulation/standard IDENTITIES (Requirement Sources + Targets). +# Each has a stable id + a canonical name + every alias/spelling. SOLVES the regulation-ID +# normalization that the Transition Coverage KPI + Knowledge Intake flagged (CRA vs "Cyber Resilience +# Act"). Reasoning seeds this; @Legal-KG / @Execution please adopt as the SHARED vocabulary. +# Not runtime, no minting — a shared knowledge artifact. + +id: VOCAB-regulations-v1 +regulations: + - {id: cra, canonical: "Cyber Resilience Act", aliases: [CRA, "Cyber Resilience Act", "Regulation (EU) 2024/2847"]} + - {id: maschinenvo, canonical: "Maschinenverordnung", aliases: [MaschinenVO, Maschinenverordnung, "Machinery Regulation", "Regulation (EU) 2023/1230"]} + - {id: iso9001, canonical: "ISO 9001", aliases: [ISO9001, "ISO 9001", "ISO/IEC 9001", QMS, "Quality Management System"]} + - {id: iso27001, canonical: "ISO/IEC 27001", aliases: [ISO27001, "ISO 27001", "ISO/IEC 27001", ISMS, "Information Security Management System"]} + - {id: tisax, canonical: "TISAX", aliases: [TISAX, "Trusted Information Security Assessment Exchange"]} + - {id: iec62443, canonical: "IEC 62443", aliases: [IEC62443, "IEC 62443", "ISO/IEC 62443"]} + - {id: nis2, canonical: "NIS2", aliases: [NIS2, "NIS 2", "Directive (EU) 2022/2555"]} + - {id: dataact, canonical: "Data Act", aliases: [DataAct, "Data Act", "Regulation (EU) 2023/2854"]} + - {id: iso13485, canonical: "ISO 13485", aliases: [ISO13485, "ISO 13485"]} + - {id: mdr, canonical: "MDR", aliases: [MDR, "Medical Device Regulation", "Regulation (EU) 2017/745"]} + - {id: iec62304, canonical: "IEC 62304", aliases: [IEC62304, "IEC 62304"]} + - {id: iso14001, canonical: "ISO 14001", aliases: [ISO14001, "ISO 14001"]} + - {id: iatf16949, canonical: "IATF 16949", aliases: [IATF16949, "IATF 16949", IATF]} diff --git a/backend-compliance/reference_scenarios/_helpers.py b/backend-compliance/reference_scenarios/_helpers.py index f135c7f8..f7847660 100644 --- a/backend-compliance/reference_scenarios/_helpers.py +++ b/backend-compliance/reference_scenarios/_helpers.py @@ -141,6 +141,25 @@ def completeness_section() -> None: ]) +def _regulation_aliases(base_dir): + """Build a normalized alias -> canonical-id map from the Domain Vocabulary (regulations.yaml).""" + import os + import yaml + path = os.path.join(base_dir, "..", "knowledge", "vocabulary", "regulations.yaml") + amap = {} + with open(path, encoding="utf-8") as h: + for r in (yaml.safe_load(h) or {}).get("regulations", []): + for name in [r["canonical"]] + list(r.get("aliases", [])): + amap["".join(c for c in str(name).lower() if c.isalnum())] = r["id"] + return amap + + +def _canon_reg(s, amap): + """Canonicalize a regulation string via the vocabulary (replaces the old hard-coded alias maps).""" + return amap.get("".join(c for c in str(s).lower() if c.isalnum()), + "".join(c for c in str(s).lower() if c.isalnum())) + + def domain_programs_section(base_dir) -> None: """Domain Knowledge Program v1 — per-domain maturity KPI DERIVED from the corpus (computed-not-stored).""" import os @@ -159,11 +178,10 @@ def domain_programs_section(base_dir) -> None: for f in sorted(os.listdir(pdir)) if f.endswith(".yaml")] progs = sorted((p for p in _all if "backlog_rank" in p), key=lambda p: p["backlog_rank"]) # domain programs only - _ALIAS = {"cyber resilience act": "cra", "maschinenverordnung": "maschinenvo", "iatf": "iatf16949"} + _amap = _regulation_aliases(base_dir) # Domain Vocabulary (regulations.yaml) def _canon(r): - k = str(r).strip().lower() - return _ALIAS.get(k, k) + return _canon_reg(r, _amap) def _hits(reg_lists, src): cs = {_canon(s) for s in src} @@ -199,7 +217,7 @@ def domain_programs_section(base_dir) -> None: coverage_table([ ("Domain Knowledge Program (7-Stufen-Produktionsstraße)", "PASS", "%d Domänen im Backlog, Industrial Automation #1" % len(progs)), ("Reifegrad-KPI (computed-not-stored)", "PASS", "aus echtem Korpus abgeleitet (TP/PB/RTS je Domäne)"), - ("Regelwerk-ID-Normalisierung", "TODO", "Alias CRA/MaschinenVO im KPI — kanonische IDs ausstehend"), + ("Regelwerk-ID-Normalisierung (Domain Vocabulary)", "PASS", "Aliase aus `vocabulary/regulations.yaml`, nicht mehr hartkodiert"), ]) @@ -224,12 +242,10 @@ def transition_coverage_section(base_dir) -> None: for it in (to if isinstance(to, list) else [to]) if isinstance(it, dict)] pats.append((frm, [t for t in tos if t], str(d.get("status", "draft")))) - _ALIAS = {"isoiec27001": "iso27001", "isoiec62443": "iec62443", - "cyberresilienceact": "cra", "maschinenverordnung": "maschinenvo"} + _amap = _regulation_aliases(base_dir) # Domain Vocabulary (regulations.yaml) def _c(s): - k = "".join(ch for ch in str(s).lower() if ch.isalnum()) - return _ALIAS.get(k, k) + return _canon_reg(s, _amap) _RANK = {"draft": 1, "reviewed": 2, "validated": 3, "proven": 4} _ICON = {0: "⚪ nicht begonnen", 1: "🟡 Draft", 2: "✅ reviewed", 3: "✅ validated", 4: "✅ Gold"} diff --git a/backend-compliance/reference_scenarios/reference_scenario_suite_v1.md b/backend-compliance/reference_scenarios/reference_scenario_suite_v1.md index b36883ad..8679da4c 100644 --- a/backend-compliance/reference_scenarios/reference_scenario_suite_v1.md +++ b/backend-compliance/reference_scenarios/reference_scenario_suite_v1.md @@ -385,7 +385,7 @@ _Industry-Einstieg + ETO-Hypothese: jede Domäne kennt ihre typischen Sources + |---|---|---| | Domain Knowledge Program (7-Stufen-Produktionsstraße) | **PASS** | 5 Domänen im Backlog, Industrial Automation #1 | | Reifegrad-KPI (computed-not-stored) | **PASS** | aus echtem Korpus abgeleitet (TP/PB/RTS je Domäne) | -| Regelwerk-ID-Normalisierung | **TODO** | Alias CRA/MaschinenVO im KPI — kanonische IDs ausstehend | +| Regelwerk-ID-Normalisierung (Domain Vocabulary) | **PASS** | Aliase aus `vocabulary/regulations.yaml`, nicht mehr hartkodiert | ## Transition Coverage — die Transition ist die Wissenseinheit (Operational Knowledge) @@ -424,5 +424,5 @@ _Der Kunde kauft nicht „EMV-Domain", sondern „wir haben ISO 9001 — helfen ## Suite-Status (Roll-up) - Coverage-Zellen gesamt: **50** -- PASS: **38** · PARTIAL: 3 · UNSUPPORTED: 1 · TODO: 7 · N/A: 1 · NEEDS_FACTS: 0 +- PASS: **39** · PARTIAL: 3 · UNSUPPORTED: 1 · TODO: 6 · N/A: 1 · NEEDS_FACTS: 0 - Fortschritt = PASS-Anteil steigt, wenn Epics RS-001…004 landen (objektiver Maßstab, kein LOC). diff --git a/backend-compliance/tests/test_vocabulary.py b/backend-compliance/tests/test_vocabulary.py new file mode 100644 index 00000000..35e031c4 --- /dev/null +++ b/backend-compliance/tests/test_vocabulary.py @@ -0,0 +1,86 @@ +"""Characterization tests for the Domain Vocabulary (data, not code). + +Pins the IDENTITY-vs-REPRESENTATION contract: regulations have a stable id + canonical name + aliases +(so CRA and "Cyber Resilience Act" resolve to the SAME identity — the normalization that the KPIs +flagged). Journey classes cluster transition instances so we do not duplicate the same reise; they +are PROVISIONAL (no MJRN minting) and reference regulation ids that exist in the vocabulary. +""" + +from __future__ import annotations + +import os + +import yaml + +_VOCAB = os.path.join(os.path.dirname(__file__), "..", "knowledge", "vocabulary") + + +def _regs(): + with open(os.path.join(_VOCAB, "regulations.yaml"), encoding="utf-8") as h: + return yaml.safe_load(h)["regulations"] + + +def _classes(): + with open(os.path.join(_VOCAB, "journey_classes.yaml"), encoding="utf-8") as h: + return yaml.safe_load(h) + + +def _norm(s): + return "".join(c for c in str(s).lower() if c.isalnum()) + + +def _alias_map(): + amap = {} + for r in _regs(): + for name in [r["canonical"]] + list(r.get("aliases", [])): + amap[_norm(name)] = r["id"] + return amap + + +def test_every_regulation_has_id_canonical_aliases(): + for r in _regs(): + assert r["id"] and r["canonical"] and r["aliases"] + assert r["id"] == r["id"].lower() # ids are lowercase stable keys + + +def test_cra_spellings_resolve_to_one_identity(): + amap = _alias_map() + # the exact normalization the KPIs needed: CRA == Cyber Resilience Act + assert amap[_norm("CRA")] == "cra" and amap[_norm("Cyber Resilience Act")] == "cra" + assert amap[_norm("Regulation (EU) 2024/2847")] == "cra" + + +def test_iso_and_management_system_aliases_resolve(): + amap = _alias_map() + assert amap[_norm("ISO9001")] == "iso9001" and amap[_norm("QMS")] == "iso9001" + assert amap[_norm("ISO/IEC 27001")] == "iso27001" and amap[_norm("ISMS")] == "iso27001" + assert amap[_norm("Maschinenverordnung")] == "maschinenvo" and amap[_norm("MaschinenVO")] == "maschinenvo" + + +def test_aliases_are_unambiguous(): + # no normalized alias maps to two different regulation identities + seen = {} + for r in _regs(): + for name in [r["canonical"]] + list(r.get("aliases", [])): + k = _norm(name) + assert seen.get(k, r["id"]) == r["id"], "ambiguous alias %r" % name + seen[k] = r["id"] + + +def test_journey_classes_are_provisional(): + assert _classes()["status"] == "provisional" # new abstraction -> own Rule of Three + + +def test_iso9001_maschinenvo_is_an_instance_not_a_new_kind(): + classes = _classes()["classes"] + qm = [c for c in classes if c["id"] == "qm-to-product-compliance"][0] + pairs = {(i["from"], i["to"]) for i in qm["instances"]} + assert ("iso9001", "maschinenvo") in pairs # same CLASS as iso9001->cra, iso13485->mdr + assert ("iso13485", "mdr") in pairs # class generalises across domains + + +def test_class_endpoints_reference_known_regulations(): + reg_ids = {r["id"] for r in _regs()} + for c in _classes()["classes"]: + for inst in c["instances"]: + assert inst["from"] in reg_ids and inst["to"] in reg_ids # vocabulary is internally consistent diff --git a/docs-src/architecture/domain-vocabulary-spec-v1.md b/docs-src/architecture/domain-vocabulary-spec-v1.md new file mode 100644 index 00000000..d89c14b1 --- /dev/null +++ b/docs-src/architecture/domain-vocabulary-spec-v1.md @@ -0,0 +1,76 @@ +# Domain Vocabulary — specification (PROPOSAL v1) + +- **Status:** PROPOSAL / draft. Beantwortet EINE Frage, bevor die nächste Journey entsteht. KEIN + Runtime-Modul, KEIN Parser, KEINE neue Architektur. +- **Datum:** 2026-06-28 +- **Bezug:** [[master-capability-registry-2c]], [journey-model-spec-v1.md](journey-model-spec-v1.md), [ADR-010](adr/ADR-010-operational-knowledge-transition-unit.md), [[strategy-requirements-intelligence]] + +## 1. Problem: es fehlt die SPRACHE + +Wir haben fünf Wissensobjekte (Requirement · Capability · Journey · Playbook · Reference Scenario), +aber kein **Vokabular**. Heute heißt eine Transition `ISO9001 → MaschinenVO`. Dieselbe Reise könnte +auch `Quality Management → Product Safety`, `QMS → Machinery Compliance` oder `Operational Excellence +→ CE` heißen. Bei 40 Requirement Sources / 300 Capabilities / 150 Journeys / 500 Playbooks wird die +**Benennung selbst zum Problem** — und ohne Vokabular modellieren wir dieselbe fachliche Sache mehrfach +unter verschiedenen Namen (genau die Duplikation, die wir bei Controls/Capabilities vermieden haben). + +## 2. Die EINE Frage + +> **Welche Begriffe unseres Systems sind IDENTITÄTEN, und welche sind nur DARSTELLUNGEN derselben +> fachlichen Bedeutung?** + +## 3. Antwort: Identitäten vs. Darstellungen + +| Ebene | Identität (stabile ID) | Owner | Darstellungen (canonical name + aliases) | +|---|---|---|---| +| Requirement | `RQ-xxxxx` | Legal/Execution | „SBOM-Pflicht" / Art.-Refs | +| Capability | `MCAP-xxxxx` (Registry 2C) | **Execution** | „Patch Management" + [Software Updates, Security Updates, Update Management, Patch Process, Vulnerability Remediation, Security Patch Procedure] | +| **Requirement Source / Target** (Regelwerk/Norm) | `reg:cra`, `reg:iso9001` … | **shared (Legal/Execution), Reasoning seedet** | „Cyber Resilience Act" + [CRA, Reg. (EU) 2024/2847]; „ISO 9001" + [QMS, Quality Management System] | +| **Journey Class** | `MJRN-xxxxx` (**PROVISIONAL**) | **Reasoning** | „Quality Management → Product Compliance" | +| Journey (Instanz) | `(source-id → target-id)` | Reasoning | `iso9001 → maschinenvo` | +| Playbook | `MPLB-xxxxx` | Reasoning | „SBOM aufbauen" | + +**Identität = die Sache; Darstellung = jeder Name dafür.** Eine Journey-Instanz ist stabil, weil ihre +Endpunkte (Source/Target) IDENTITÄTEN sind, nicht Strings — egal ob man „ISO9001", „QMS" oder +„Operational Excellence" schreibt. + +## 4. Capability-Vokabular = Capability Registry (2C), NICHT neu bauen + +Canonical Name + Aliases einer Capability sind bereits ein **Registry-2C-Konzern** (Execution): die +Registry hat stabile `MCAP`-IDs + Relationstypen `equivalent`/`related` (= Synonyme) + Provenance. +Das Domain Vocabulary DUPLIZIERT das nicht — es macht es nur explizit und ergänzt zwei NEUE Ebenen, +die Reasoning besitzt: **Regelwerk-Identitäten** und **Journey Class**. + +## 5. Sofort-Nutzen: Regelwerk-Normalisierung (löst einen offenen TODO) + +`reg:cra` mit canonical „Cyber Resilience Act" + aliases `[CRA, Cyber Resilience Act, Reg. (EU) +2024/2847]` löst genau die **Regelwerk-ID-Normalisierung**, die Transition-Coverage-KPI + Knowledge +Intake bisher als TODO führen (CRA vs „Cyber Resilience Act"). `knowledge/vocabulary/regulations.yaml` +wird die SHARED Quelle; die Reference-Suite-KPIs lesen Aliase daraus statt aus hartkodierten Maps. + +## 6. Journey Class (PROVISIONAL — eigene Rule of Three) + +Eine Journey CLASS clustert Instanzen, die „dieselbe Reise" sind. `knowledge/vocabulary/journey_classes.yaml` +clustert unsere realen Transitionen — z. B. **`Information Security → Product Cybersecurity`** +(ISO27001→CRA, TISAX→CRA, IEC62443→CRA) und **`Quality Management → Product Compliance/Safety`** +(ISO9001→CRA, ISO9001→MaschinenVO, später ISO13485→MDR). So schreibt man NICHT für jede Zertifizierung +eine neue Journey. **Journey Class ist eine NEUE Abstraktion → provisional (kein MJRN-Mint), bis sie +sich selbst an ≥3 Instanzen je Klasse bewährt** ([[rule-of-three-canonicalization]]). + +## 7. Nebeneffekt: Requirements Intelligence (Vision V2) + +Wenn später ein Tender „Security Patch Procedure" fordert, erkennt BreakPilot den Alias von `MCAP-0017`, +ohne dass irgendwo „Patch Management" steht. Stabile Begriffe → konsistente Parser, Tender-Vergleiche, +Playbooks, Knowledge Intake. Das ist die Grundlage der Requirements Verification Platform +([[strategy-requirements-intelligence]]). + +## 8. Reihenfolge (User 2026-06-28) + +`Vocabulary` → `Transition #2` → `Transition #3` → Rule of Three → Journey kanonisch. Die nächsten zwei +Journeys zeigen, OB das Journey-MODELL stabil ist; das Vokabular zeigt, OB wir dieselbe fachliche Sache +immer GLEICH benennen — langfristig mindestens genauso wichtig. + +## 9. Was das NICHT ist + +- Kein Runtime/Parser/Engine, kein MCAP/MJRN-Minting (Freeze unberührt). Seed-Daten + Spec. +- Non-runtime → kein Deploy (ADR-001).