feat: Certification Capability Hypotheses — capability-centric library + empirical confidence

The bottleneck is knowledge, not the endpoint. This builds the knowledge the Onboarding Advisor needs,
restructured per the user's key insight: NOT "ISO27001 -> 30 capabilities" but each hypothesis as its
own object "capability -> supported_by: [certs]". A capability is written ONCE with all supporting
certs, so the shared management-system core (document control, incident, supplier, audit, access,
asset, monitoring, training, crypto, release, risk) covers most certifications with ~18 hypotheses
instead of ~300 — and multi-certification merges AUTOMATICALLY (a company's inferred caps = every
hypothesis whose supported_by intersects its certs).

Welt-1 throughout: "IF cert present, EXPECT capability (verification required)", never "erfüllt".
Capabilities NO cert suggests (SBOM, signed updates, CVD, support period) have no hypothesis -> they
stay in the delta and get asked. confidence is EMPIRICAL: computed from real-onboarding observations
(confirmed/(confirmed+refuted)), None until calibrated — never an LLM/expert score (record_observation
+ empirical_confidence). The long-term moat: knowledge that learns from reality, not from a norm.

compliance/onboarding/hypotheses.py (resolve_for_certifications / inferred_hypotheses / empirical_
confidence / record_observation) feeds the existing advisor_start unchanged; the demo now runs on the
curated library. Pure, mypy --strict clean, library is DATA (no norm text, no real names). Non-runtime
-> no deploy. 12 tests pass, check-loc 0.
This commit is contained in:
Benjamin Admin
2026-06-28 13:16:45 +02:00
parent 02c9fdb18e
commit 2d2cb2a244
6 changed files with 260 additions and 7 deletions
@@ -9,6 +9,14 @@ meta-model; certificate->capability hypotheses and target requirements are INJEC
from __future__ import annotations from __future__ import annotations
from .engine import advisor_start, apply_answer from .engine import advisor_start, apply_answer
from .hypotheses import (
CapabilityHypothesis,
HypothesisObservations,
empirical_confidence,
inferred_hypotheses,
record_observation,
resolve_for_certifications,
)
from .schemas import ( from .schemas import (
AdvisorMeasure, AdvisorMeasure,
AdvisorQuestion, AdvisorQuestion,
@@ -27,4 +35,10 @@ __all__ = [
"AdvisorMeasure", "AdvisorMeasure",
"InferredAssumption", "InferredAssumption",
"RejectedAssumption", "RejectedAssumption",
"CapabilityHypothesis",
"HypothesisObservations",
"empirical_confidence",
"record_observation",
"inferred_hypotheses",
"resolve_for_certifications",
] ]
@@ -0,0 +1,71 @@
"""Certification Capability Hypotheses — capability-centric, with EMPIRICAL (computed) confidence.
Each hypothesis is its own knowledge object: "IF a company holds one of `supported_by` certs, we EXPECT
`capability` (verification required)" — Welt-1, never "erfüllt". Written ONCE per capability with a list
of supporting certs (reuse, not redundancy), so multi-certification merges AUTOMATICALLY.
`confidence` is NOT an expert/LLM score: it is COMPUTED from real-onboarding observations
(confirmed / (confirmed+refuted)), `None` until any are seen. This is the empirical learning loop — the
long-term moat. The library is DATA, loaded outside this module and injected. Python 3.9 compatible.
"""
from __future__ import annotations
from typing import Dict, List, Optional, Sequence
from pydantic import BaseModel, Field
class HypothesisObservations(BaseModel):
confirmed: int = 0
refuted: int = 0
class CapabilityHypothesis(BaseModel):
id: str
capability: str
supported_by: List[str] = Field(default_factory=list) # certifications that suggest this capability
relationship: str = "supports" # supports / partially_supports
verification_required: bool = True # Welt-1: never auto-satisfied
question_intent: str = "verify_existence"
expected_evidence: List[str] = Field(default_factory=list)
observations: HypothesisObservations = Field(default_factory=HypothesisObservations)
kind: str = "shared" # shared / specific
def empirical_confidence(obs: HypothesisObservations) -> Optional[float]:
"""Confidence from observations only: confirmed / (confirmed+refuted). None until any are recorded."""
n = obs.confirmed + obs.refuted
return round(obs.confirmed / n, 2) if n else None
def record_observation(obs: HypothesisObservations, confirmed: bool) -> HypothesisObservations:
"""One real-onboarding observation -> updated counts (the empirical calibration step)."""
return HypothesisObservations(
confirmed=obs.confirmed + (1 if confirmed else 0),
refuted=obs.refuted + (0 if confirmed else 1),
)
def inferred_hypotheses(
certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
) -> List[CapabilityHypothesis]:
"""Every hypothesis whose `supported_by` intersects the company's certs — the auto multi-cert merge."""
certs = set(certifications)
return [h for h in library if certs & set(h.supported_by)]
def resolve_for_certifications(
certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
) -> Dict[str, List[str]]:
"""Adapt the capability-centric library to the Advisor's `cert -> [capability]` input.
For each held certification, the capabilities its hypotheses suggest (deduped, deterministic order).
"""
certs = set(certifications)
out: Dict[str, List[str]] = {}
for h in library:
for cert in h.supported_by:
if cert in certs and h.capability not in out.setdefault(cert, []):
out[cert].append(h.capability)
return {c: out[c] for c in sorted(out)}
@@ -0,0 +1,92 @@
# Certification Capability Hypotheses — CAPABILITY-CENTRIC, shared core first.
#
# Proprietary norms (ISO/TISAX/PCI…) are NOT ingested. Instead each hypothesis is its own knowledge
# object: "IF a company holds these certifications, we EXPECT this capability with some probability —
# verification required". NOT "ISO 27001 HAS X" (Welt-2) but "ISO 27001 SUGGESTS X" (Welt-1).
#
# THE TRICK (reuse, not redundancy): a capability is written ONCE with `supported_by: [certs]`. Most
# management-system capabilities (document control, incident, supplier, audit, risk, asset, access,
# training, monitoring) recur across many certs, so ~40-60 hypotheses cover everything instead of ~300.
# Multi-certification then merges AUTOMATICALLY (a company's inferred caps = every hypothesis whose
# supported_by intersects its certs). capability ids match the existing transition patterns.
#
# `confidence.empirical` stays NULL until calibrated from REAL onboardings (observations.confirmed /
# refuted) — never an LLM/expert score. Capabilities a cert does NOT suggest (SBOM, CVD, support period,
# signed updates) simply have NO hypothesis -> they always stay in the delta and get asked. AI first
# draft (~95%), expert review + customer calibration follow. No norm text reproduced. No real names.
hypotheses:
# ── SHARED CORE — management-system capabilities that recur across certifications ───────────
- {id: HYP-document_control, capability: document_and_change_control, relationship: supports, kind: shared,
supported_by: [ISO9001, ISO13485, ISO27001, TISAX, ASPICE, IATF16949],
verification_required: true, question_intent: verify_existence, expected_evidence: [document_control_procedure],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-incident_management, capability: incident_management, relationship: supports, kind: shared,
supported_by: [ISO27001, TISAX, IEC62443, ISO13485],
verification_required: true, question_intent: verify_existence, expected_evidence: [incident_procedure],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-supplier_security, capability: supplier_security, relationship: supports, kind: shared,
supported_by: [ISO27001, TISAX, IEC62443],
verification_required: true, question_intent: verify_existence, expected_evidence: [supplier_security_records],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-supplier_evaluation, capability: supplier_evaluation, relationship: supports, kind: shared,
supported_by: [ISO9001, IATF16949, ISO13485],
verification_required: true, question_intent: verify_existence, expected_evidence: [supplier_evaluation_records],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-access_control, capability: access_control_and_authentication, relationship: supports, kind: shared,
supported_by: [ISO27001, TISAX, IEC62443],
verification_required: true, question_intent: verify_existence, expected_evidence: [access_control_policy],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-logging_monitoring, capability: security_logging_and_monitoring, relationship: supports, kind: shared,
supported_by: [ISO27001, TISAX, IEC62443],
verification_required: true, question_intent: verify_existence, expected_evidence: [logging_configuration],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-asset_config, capability: asset_and_configuration_management, relationship: supports, kind: shared,
supported_by: [ISO27001, TISAX, IEC62443],
verification_required: true, question_intent: verify_existence, expected_evidence: [asset_inventory],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-vuln_management, capability: technical_vulnerability_management, relationship: partially_supports, kind: shared,
supported_by: [ISO27001, TISAX, IEC62443],
verification_required: true, question_intent: confirm_product_scope, expected_evidence: [vulnerability_management_process],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-isms, capability: information_security_management, relationship: supports, kind: shared,
supported_by: [ISO27001, TISAX],
verification_required: true, question_intent: verify_existence, expected_evidence: [isms_scope],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-cryptography, capability: cryptography, relationship: supports, kind: shared,
supported_by: [ISO27001, TISAX, IEC62443],
verification_required: true, question_intent: verify_existence, expected_evidence: [crypto_policy],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-training, capability: security_awareness_training, relationship: supports, kind: shared,
supported_by: [ISO27001, TISAX],
verification_required: true, question_intent: verify_existence, expected_evidence: [training_records],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-prototype_protection, capability: protect_prototypes, relationship: supports, kind: shared,
supported_by: [TISAX],
verification_required: true, question_intent: verify_existence, expected_evidence: [prototype_protection_policy],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-release_approval, capability: release_and_approval_process, relationship: supports, kind: shared,
supported_by: [ISO9001, IATF16949, ISO13485],
verification_required: true, question_intent: verify_existence, expected_evidence: [release_procedure],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-ce_conformity, capability: ce_conformity_assessment_and_technical_documentation, relationship: partially_supports, kind: shared,
supported_by: [ISO9001, IATF16949],
verification_required: true, question_intent: request_evidence, expected_evidence: [technical_documentation],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
# ── CERT-SPECIFIC — capabilities a single domain's certificate suggests ─────────────────────
- {id: HYP-secure_dev, capability: secure_development_lifecycle, relationship: partially_supports, kind: specific,
supported_by: [IEC62443, ASPICE],
verification_required: true, question_intent: verify_existence, expected_evidence: [secure_development_policy],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-csms, capability: cybersecurity_management_system, relationship: supports, kind: specific,
supported_by: [IEC62443],
verification_required: true, question_intent: verify_existence, expected_evidence: [csms_records],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-environmental_docs, capability: environmental_management_documentation, relationship: supports, kind: specific,
supported_by: [ISO14001],
verification_required: true, question_intent: verify_existence, expected_evidence: [environmental_aspects_register],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
- {id: HYP-software_process, capability: assess_software_process_capability, relationship: supports, kind: specific,
supported_by: [ASPICE],
verification_required: true, question_intent: verify_existence, expected_evidence: [aspice_assessment],
confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
@@ -6,12 +6,12 @@ _Eingabe: Unternehmen + Produkte + Zertifizierungen + Ziel. Den Rest macht die O
> Zertifizierungen: **ISO9001, ISO27001, ISO14001, TISAX** · Produkt: **Parkschein-/Schrankensystem** · Ziel: **CRA** > Zertifizierungen: **ISO9001, ISO27001, ISO14001, TISAX** · Produkt: **Parkschein-/Schrankensystem** · Ziel: **CRA**
## Was wir erkannt haben ## Was wir erkannt haben
> 17 Anforderungen erkannt · 6 wahrscheinlich abgedeckt · 5 zu klären > 17 Anforderungen erkannt · 5 wahrscheinlich abgedeckt · 5 zu klären
**Aus Ihren Zertifizierungen abgeleitet (zu bestätigen, nicht automatisch erfüllt):** **Aus Ihren Zertifizierungen abgeleitet (zu bestätigen, nicht automatisch erfüllt):**
- ISO9001 legt 1 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt - ISO9001 legt 1 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
- ISO27001 legt 5 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt - ISO27001 legt 4 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
- TISAX legt 5 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt - TISAX legt 4 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
- _ISO14001 ist für dieses Ziel nicht relevant — relevance(evidence, target) = 0 — keine geforderte Fähigkeit abgedeckt_ - _ISO14001 ist für dieses Ziel nicht relevant — relevance(evidence, target) = 0 — keine geforderte Fähigkeit abgedeckt_
## Die wenigen offenen Punkte — nur die nächsten besten Fragen ## Die wenigen offenen Punkte — nur die nächsten besten Fragen
@@ -12,7 +12,7 @@ from __future__ import annotations
import os import os
import yaml import yaml
from compliance.onboarding import OnboardingInput, advisor_start from compliance.onboarding import CapabilityHypothesis, OnboardingInput, advisor_start, resolve_for_certifications
from compliance.transition_reasoning import TargetRequirement from compliance.transition_reasoning import TargetRequirement
OUT = [] OUT = []
@@ -29,13 +29,14 @@ req = [TargetRequirement(capability_id=a["capability"]) for a in CRA["likely_cov
req += [TargetRequirement(capability_id=d["capability"], question_intent=d.get("needed_information", "verify_existence"), req += [TargetRequirement(capability_id=d["capability"], question_intent=d.get("needed_information", "verify_existence"),
expected_evidence=d.get("expected_evidence", [])) for d in CRA["delta_requirements"]] expected_evidence=d.get("expected_evidence", [])) for d in CRA["delta_requirements"]]
covers = {d["capability"]: d.get("covers_targets", []) for d in CRA["delta_requirements"]} covers = {d["capability"]: d.get("covers_targets", []) for d in CRA["delta_requirements"]}
hyp = {"ISO27001": infosec, "TISAX": infosec, # certificate hypotheses come from the CURATED, capability-centric library (multi-cert merges automatically)
"ISO9001": ["ce_conformity_assessment_and_technical_documentation"], _lib = [CapabilityHypothesis(**h) for h in yaml.safe_load(
"ISO14001": ["environmental_management_documentation"]} open(os.path.join(os.path.dirname(__file__), "..", "knowledge", "certification_hypotheses", "hypotheses.yaml"), encoding="utf-8"))["hypotheses"]]
inp = OnboardingInput(company="synthetisch", industry="machine_builder", inp = OnboardingInput(company="synthetisch", industry="machine_builder",
products=["Parkschein-/Schrankensystem"], markets=["EU", "DE"], products=["Parkschein-/Schrankensystem"], markets=["EU", "DE"],
certifications=["ISO9001", "ISO27001", "ISO14001", "TISAX"], certifications=["ISO9001", "ISO27001", "ISO14001", "TISAX"],
known_evidence=["CE process"], target=["CRA"]) known_evidence=["CE process"], target=["CRA"])
hyp = resolve_for_certifications(inp.certifications, _lib)
res = advisor_start(inp, hyp, req, target_id="CRA", covers_targets=covers, corpus_status={"CRA": "validated"}) res = advisor_start(inp, hyp, req, target_id="CRA", covers_targets=covers, corpus_status={"CRA": "validated"})
w("# Smart Onboarding Advisor — was der Nutzer sieht (automatisch, ohne Vertrieb)") w("# Smart Onboarding Advisor — was der Nutzer sieht (automatisch, ohne Vertrieb)")
@@ -0,0 +1,75 @@
"""Certification Capability Hypotheses — capability-centric library + empirical confidence.
Pins the reuse design (one capability, many supporting certs -> ~40-60 hypotheses, not ~300), the
automatic multi-certification merge, the empirical (computed) confidence loop, and the Welt-1 guarantee
that capabilities NO cert suggests (SBOM, signed updates, CVD) are never inferred -> they stay in the
delta and get asked. Then the Advisor consumes the resolved library end-to-end.
"""
from __future__ import annotations
import os
import yaml
from compliance.onboarding import (
CapabilityHypothesis,
HypothesisObservations,
OnboardingInput,
advisor_start,
empirical_confidence,
inferred_hypotheses,
record_observation,
resolve_for_certifications,
)
from compliance.transition_reasoning import TargetRequirement
_DIR = os.path.dirname(__file__)
_LIB = [CapabilityHypothesis(**h) for h in yaml.safe_load(
open(os.path.join(_DIR, "..", "knowledge", "certification_hypotheses", "hypotheses.yaml"), encoding="utf-8"))["hypotheses"]]
def test_library_is_capability_centric_and_reuses_certs():
# the shared core is small (reuse, not 30-per-cert) and document control is supported by many certs
doc = next(h for h in _LIB if h.capability == "document_and_change_control")
assert len(doc.supported_by) >= 4
assert len(_LIB) <= 60 # whole library, not ~300
def test_multi_certification_merges_automatically():
# a company with ISO9001 + ISO14001 + TISAX gets the UNION of their hypotheses, deduped
merged = inferred_hypotheses(["ISO9001", "ISO14001", "TISAX"], _LIB)
caps = {h.capability for h in merged}
assert "document_and_change_control" in caps # ISO9001 + TISAX
assert "information_security_management" in caps # TISAX
assert "environmental_management_documentation" in caps # ISO14001
# SBOM / signed updates are suggested by NO certificate -> never inferred
assert "sbom_creation" not in caps and "secure_signed_update_distribution" not in caps
def test_empirical_confidence_is_computed_not_assigned():
obs = HypothesisObservations()
assert empirical_confidence(obs) is None # null until observed
obs = record_observation(obs, True)
obs = record_observation(obs, True)
obs = record_observation(obs, False)
assert empirical_confidence(obs) == 0.67 # 2 / 3, from observations only
def test_resolve_adapts_to_advisor_input():
res = resolve_for_certifications(["ISO27001", "ISO9001"], _LIB)
assert "incident_management" in res["ISO27001"]
assert "document_and_change_control" in res["ISO9001"]
def test_advisor_consumes_the_library_end_to_end():
cra = yaml.safe_load(open(os.path.join(_DIR, "..", "knowledge", "transition_patterns",
"transition_pattern_iso27001_to_cra_maschinenvo_v1.yaml"), encoding="utf-8"))
req = [TargetRequirement(capability_id=a["capability"]) for a in cra["likely_covered"]]
req += [TargetRequirement(capability_id=d["capability"], expected_evidence=d.get("expected_evidence", []))
for d in cra["delta_requirements"]]
inp = OnboardingInput(company="x", certifications=["ISO27001", "TISAX", "ISO9001", "ISO14001"], target=["CRA"])
hyp = resolve_for_certifications(inp.certifications, _LIB) # library -> advisor input
res = advisor_start(inp, hyp, req, target_id="CRA", corpus_status={"CRA": "validated"})
assert res.inferred_assumptions and res.next_best_questions
assert any(r.certification == "ISO14001" for r in res.rejected_assumptions) # not relevant to CRA