# ruff: noqa # mypy: ignore-errors """Cross-Domain MCAP Convergence Analysis — where does the knowledge model converge? (Phase Ω, pause) After Automotive the user paused on adding domains to ask a deeper question. NOT "which MCAPs occur most often?" (frequency deceives — a generic `document_changes` may be in 40 sources but is not the product core) but "which MCAPs CARRY the largest part of the system?". The answer is a deterministic MCAP IMPACT SCORE (no AI), an internal engineering tool, computed by aggregating over the EXISTING data only. Impact(MCAP) = distinct Requirement Sources + distinct Target Types + distinct Domains + distinct Journeys + Regulatory Leverage + Business Leverage Four reports, all pure aggregation (no new runtime, no new architecture): 1. Core — highest impact (the cross-cutting nodes that carry the system) 2. Emerging — span >= 2 domains (bridges across requirement worlds) 3. Isolated — only one source/journey/domain (specialised, OR convergence not yet recognised) 4. Suspicious— probably cut too coarse (generic) or too fine (one hyper-specific occurrence) Non-runtime -> no deploy. Run: cd backend-compliance && PYTHONPATH=. python3 reference_scenarios/mcap_convergence_analysis.py """ from __future__ import annotations import os import yaml OUT = [] def w(s=""): OUT.append(s) _HERE = os.path.dirname(__file__) _TP = os.path.join(_HERE, "..", "knowledge", "transition_patterns") # pattern -> (domain, default target_type, default sources, source_type-of-default) PATTERN_META = { "transition_pattern_iso27001_to_cra_maschinenvo_v1.yaml": ("industrial_automation", "regulation", ["CRA", "MaschinenVO"]), "transition_pattern_iso27001_to_cra_v1.yaml": ("industrial_automation", "regulation", ["CRA"]), "transition_pattern_iso9001_to_cra_v1.yaml": ("industrial_automation", "regulation", ["CRA"]), "transition_pattern_isms_to_tisax_v1.yaml": ("automotive", "certification", ["TISAX"]), "transition_pattern_iso14001_to_environmental_v1.yaml": ("environmental", "regulation", ["REACH", "RoHS", "Batterieverordnung", "Wasserrecht", "Abwasservorschriften", "Energiemanagement", "Kreislaufwirtschaft", "Emissionsschutz"]), } # capability -> dict of sets we aggregate idx = {} def _ent(cap): return idx.setdefault(cap, {"sources": set(), "types": set(), "domains": set(), "journeys": set(), "regs": set(), "markets": set()}) def _add(cap, sources, stype, domain, journey): e = _ent(cap) e["sources"] |= set(sources) e["types"].add(stype) e["domains"].add(domain) e["journeys"].add(journey) if stype == "regulation": e["regs"] |= set(sources) if stype == "contract": e["markets"] |= set(sources) # ── A) transition patterns: each pattern is a journey with a target/domain ─────────────────── # IMPORTANT (anti-frequency-deception): a `likely_covered` cap is PROVIDED BY the source cert (one # certification source), NOT required by every target regulation — attributing all target sources to it # would inflate management caps on raw frequency alone. Only `delta` caps name their real target sources. for fname, (domain, ttype, default_sources) in PATTERN_META.items(): p = yaml.safe_load(open(os.path.join(_TP, fname), encoding="utf-8")) journey = p.get("id", fname) cert = (p.get("transition_goal", {}).get("from", {}) or {}).get("standard", journey) for a in p.get("likely_covered", []): _add(a["capability"], [cert], "certification", domain, journey) # provided by the cert for d in p.get("delta_requirements", []): srcs = d.get("covers_targets") or default_sources # required by these target sources _add(d["capability"], srcs, ttype, domain, journey) # ── B) automotive multi-source data: precise per-source attribution ────────────────────────── A = yaml.safe_load(open(os.path.join(_HERE, "..", "knowledge", "domains", "automotive", "source_capabilities.yaml"), encoding="utf-8")) for s in A["sources"]: for cap in s["requires"]: _add(cap, [s["id"]], s["type"], "automotive", "automotive_ecu") # ── Impact score (deterministic) ───────────────────────────────────────────────────────────── def impact(e): return len(e["sources"]) + len(e["types"]) + len(e["domains"]) + len(e["journeys"]) + len(e["regs"]) + len(e["markets"]) scored = sorted(idx.items(), key=lambda kv: (-impact(kv[1]), kv[0])) GENERIC = ("document_", "manage_", "control_", "conduct_", "operate_", "run_", "assign_", "plan_", "approve_") w("# Cross-Domain MCAP Convergence Analysis — wo konvergiert das Wissensmodell?") w("") w('_Nicht „welche MCAPs kommen am häufigsten vor?" (Häufigkeit täuscht), sondern „welche MCAPs TRAGEN den größten Teil des Systems?". Deterministischer **Impact-Score** (kein ML), internes Engineering-Werkzeug, reine Aggregation über vorhandene Daten (5 Transition Patterns + 7 Automotive-Quellen). Non-runtime, keine echten Namen._') w("") w("## Impact-Score (deterministisch)") w("> `Impact = distinct Sources + distinct Target-Types + distinct Domains + distinct Journeys + Regulatory Leverage + Business Leverage`") w("- %d distinct Capabilities (MCAP-Kandidaten) über alle Quellen aggregiert." % len(idx)) w("") # ── 1. Core MCAPs ───────────────────────────────────────────────────────── w("## 1. Core MCAPs — höchster Impact (die tragenden Knoten)") w("| Capability | Impact | Sources | Types | Domains | Journeys |") w("|---|---:|---:|---:|---:|---:|") for cap, e in scored[:10]: w("| `%s` | **%d** | %d | %d | %d | %d |" % (cap, impact(e), len(e["sources"]), len(e["types"]), len(e["domains"]), len(e["journeys"]))) w("") w('→ Hoher Impact = ein Knoten verbindet viele Quellen ÜBER Typen/Domänen/Journeys hinweg — nicht „in 40 Dokumenten einer Normenfamilie".') w("") # ── 2. Emerging MCAPs (cross-domain bridges) ────────────────────────────── emerging = [(c, e) for c, e in scored if len(e["domains"]) >= 2] w("## 2. Emerging MCAPs — verbinden ≥2 Domänen (Brücken zwischen Anforderungswelten)") for cap, e in emerging[:8]: w("- `%s` — %d Domänen (%s), %d Typen." % (cap, len(e["domains"]), ", ".join(sorted(e["domains"])), len(e["types"]))) w('- _(Echtes „Wachstum über Zeit" braucht historische Snapshots — hier Proxy = Domänen-Spannweite jetzt.)_') w("") # ── 3. Isolated MCAPs ───────────────────────────────────────────────────── isolated = [(c, e) for c, e in scored if len(e["sources"]) == 1 and len(e["journeys"]) == 1] w("## 3. Isolated MCAPs — nur 1 Quelle/Journey (Review: spezialisiert ODER Konvergenz übersehen?)") w("- %d Stück, u. a.: %s." % (len(isolated), ", ".join("`%s`" % c for c, _ in isolated[:8]))) w("") # ── 4. Suspicious MCAPs (abstraction level) ─────────────────────────────── too_coarse = [(c, e) for c, e in scored if c.startswith(GENERIC) and len(e["types"]) <= 1 and len(e["sources"]) >= 2] too_fine = [(c, e) for c, e in isolated if len(c) >= 34] w("## 4. Suspicious MCAPs — Abstraktionsgrad-Verdacht (Experten-Review)") w("- **Evtl. zu grob** (generisches Verb, breit aber nur 1 Typ): %s." % (", ".join("`%s`" % c for c, _ in too_coarse[:6]) or "—")) w("- **Evtl. zu fein** (isoliert + sehr spezifischer Name): %s." % (", ".join("`%s`" % c for c, _ in too_fine[:6]) or "—")) w("- Die Analyse sagt damit nicht nur WELCHE MCAPs wichtig sind, sondern auch, ob sie auf dem **richtigen Abstraktionsniveau** definiert sind.") w("") # ── Befund ───────────────────────────────────────────────────────────────── core_cut = [c for c, e in scored if impact(e) >= 8] cross = [c for c, e in scored if len(e["domains"]) >= 2] w("## Befund") w("") w('> **Ein Kern beginnt sich zu zeigen:** %d von %d Capabilities erreichen Impact ≥ 8 (tragende Knoten), %d verbinden ≥2 Domänen. Bislang ist das Wissensmodell noch jung (5 Patterns + 1 Automotive-Profil), aber die Methode steht: sobald Medical/Payment/weitere Domänen als DATEN hinzukommen, zeigt dieselbe Aggregation, ob sich der erwartete stabile Kern von 30–50 hochkonvergenten MCAPs bildet — der gemeinsame Strukturkern hinter sehr unterschiedlichen Anforderungswelten. Das ist ein tieferer Wertnachweis als „eine weitere Norm unterstützt". Reine Aggregation, 0 Runtime, 0 neue Architektur.' % (len(core_cut), len(idx), len(cross))) w("") print("\n".join(OUT))