Merge pull request 'Certification Capability Hypotheses — capability-centric + empirical confidence' (#42) from feat/certification-hypotheses into main

2026-06-28 13:17:20 +02:00
parent 02c9fdb18e 2d2cb2a244
commit 59b7006e5a
6 changed files with 260 additions and 7 deletions
@@ -9,6 +9,14 @@ meta-model; certificate->capability hypotheses and target requirements are INJEC
 from __future__ import annotations

 from .engine import advisor_start, apply_answer
+from .hypotheses import (
+    CapabilityHypothesis,
+    HypothesisObservations,
+    empirical_confidence,
+    inferred_hypotheses,
+    record_observation,
+    resolve_for_certifications,
+)
 from .schemas import (
    AdvisorMeasure,
    AdvisorQuestion,
@@ -27,4 +35,10 @@ __all__ = [
    "AdvisorMeasure",
    "InferredAssumption",
    "RejectedAssumption",
+    "CapabilityHypothesis",
+    "HypothesisObservations",
+    "empirical_confidence",
+    "record_observation",
+    "inferred_hypotheses",
+    "resolve_for_certifications",
 ]
@@ -0,0 +1,71 @@
+"""Certification Capability Hypotheses — capability-centric, with EMPIRICAL (computed) confidence.
+
+Each hypothesis is its own knowledge object: "IF a company holds one of `supported_by` certs, we EXPECT
+`capability` (verification required)" — Welt-1, never "erfüllt". Written ONCE per capability with a list
+of supporting certs (reuse, not redundancy), so multi-certification merges AUTOMATICALLY.
+
+`confidence` is NOT an expert/LLM score: it is COMPUTED from real-onboarding observations
+(confirmed / (confirmed+refuted)), `None` until any are seen. This is the empirical learning loop — the
+long-term moat. The library is DATA, loaded outside this module and injected. Python 3.9 compatible.
+"""
+
+from __future__ import annotations
+
+from typing import Dict, List, Optional, Sequence
+
+from pydantic import BaseModel, Field
+
+
+class HypothesisObservations(BaseModel):
+    confirmed: int = 0
+    refuted: int = 0
+
+
+class CapabilityHypothesis(BaseModel):
+    id: str
+    capability: str
+    supported_by: List[str] = Field(default_factory=list)        # certifications that suggest this capability
+    relationship: str = "supports"                               # supports / partially_supports
+    verification_required: bool = True                           # Welt-1: never auto-satisfied
+    question_intent: str = "verify_existence"
+    expected_evidence: List[str] = Field(default_factory=list)
+    observations: HypothesisObservations = Field(default_factory=HypothesisObservations)
+    kind: str = "shared"                                         # shared / specific
+
+
+def empirical_confidence(obs: HypothesisObservations) -> Optional[float]:
+    """Confidence from observations only: confirmed / (confirmed+refuted). None until any are recorded."""
+    n = obs.confirmed + obs.refuted
+    return round(obs.confirmed / n, 2) if n else None
+
+
+def record_observation(obs: HypothesisObservations, confirmed: bool) -> HypothesisObservations:
+    """One real-onboarding observation -> updated counts (the empirical calibration step)."""
+    return HypothesisObservations(
+        confirmed=obs.confirmed + (1 if confirmed else 0),
+        refuted=obs.refuted + (0 if confirmed else 1),
+    )
+
+
+def inferred_hypotheses(
+    certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
+) -> List[CapabilityHypothesis]:
+    """Every hypothesis whose `supported_by` intersects the company's certs — the auto multi-cert merge."""
+    certs = set(certifications)
+    return [h for h in library if certs & set(h.supported_by)]
+
+
+def resolve_for_certifications(
+    certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
+) -> Dict[str, List[str]]:
+    """Adapt the capability-centric library to the Advisor's `cert -> [capability]` input.
+
+    For each held certification, the capabilities its hypotheses suggest (deduped, deterministic order).
+    """
+    certs = set(certifications)
+    out: Dict[str, List[str]] = {}
+    for h in library:
+        for cert in h.supported_by:
+            if cert in certs and h.capability not in out.setdefault(cert, []):
+                out[cert].append(h.capability)
+    return {c: out[c] for c in sorted(out)}
@@ -0,0 +1,92 @@
+# Certification Capability Hypotheses — CAPABILITY-CENTRIC, shared core first.
+#
+# Proprietary norms (ISO/TISAX/PCI…) are NOT ingested. Instead each hypothesis is its own knowledge
+# object: "IF a company holds these certifications, we EXPECT this capability with some probability —
+# verification required". NOT "ISO 27001 HAS X" (Welt-2) but "ISO 27001 SUGGESTS X" (Welt-1).
+#
+# THE TRICK (reuse, not redundancy): a capability is written ONCE with `supported_by: [certs]`. Most
+# management-system capabilities (document control, incident, supplier, audit, risk, asset, access,
+# training, monitoring) recur across many certs, so ~40-60 hypotheses cover everything instead of ~300.
+# Multi-certification then merges AUTOMATICALLY (a company's inferred caps = every hypothesis whose
+# supported_by intersects its certs). capability ids match the existing transition patterns.
+#
+# `confidence.empirical` stays NULL until calibrated from REAL onboardings (observations.confirmed /
+# refuted) — never an LLM/expert score. Capabilities a cert does NOT suggest (SBOM, CVD, support period,
+# signed updates) simply have NO hypothesis -> they always stay in the delta and get asked. AI first
+# draft (~95%), expert review + customer calibration follow. No norm text reproduced. No real names.
+
+hypotheses:
+  # ── SHARED CORE — management-system capabilities that recur across certifications ───────────
+  - {id: HYP-document_control, capability: document_and_change_control, relationship: supports, kind: shared,
+     supported_by: [ISO9001, ISO13485, ISO27001, TISAX, ASPICE, IATF16949],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [document_control_procedure],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-incident_management, capability: incident_management, relationship: supports, kind: shared,
+     supported_by: [ISO27001, TISAX, IEC62443, ISO13485],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [incident_procedure],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-supplier_security, capability: supplier_security, relationship: supports, kind: shared,
+     supported_by: [ISO27001, TISAX, IEC62443],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [supplier_security_records],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-supplier_evaluation, capability: supplier_evaluation, relationship: supports, kind: shared,
+     supported_by: [ISO9001, IATF16949, ISO13485],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [supplier_evaluation_records],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-access_control, capability: access_control_and_authentication, relationship: supports, kind: shared,
+     supported_by: [ISO27001, TISAX, IEC62443],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [access_control_policy],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-logging_monitoring, capability: security_logging_and_monitoring, relationship: supports, kind: shared,
+     supported_by: [ISO27001, TISAX, IEC62443],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [logging_configuration],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-asset_config, capability: asset_and_configuration_management, relationship: supports, kind: shared,
+     supported_by: [ISO27001, TISAX, IEC62443],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [asset_inventory],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-vuln_management, capability: technical_vulnerability_management, relationship: partially_supports, kind: shared,
+     supported_by: [ISO27001, TISAX, IEC62443],
+     verification_required: true, question_intent: confirm_product_scope, expected_evidence: [vulnerability_management_process],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-isms, capability: information_security_management, relationship: supports, kind: shared,
+     supported_by: [ISO27001, TISAX],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [isms_scope],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-cryptography, capability: cryptography, relationship: supports, kind: shared,
+     supported_by: [ISO27001, TISAX, IEC62443],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [crypto_policy],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-training, capability: security_awareness_training, relationship: supports, kind: shared,
+     supported_by: [ISO27001, TISAX],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [training_records],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-prototype_protection, capability: protect_prototypes, relationship: supports, kind: shared,
+     supported_by: [TISAX],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [prototype_protection_policy],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-release_approval, capability: release_and_approval_process, relationship: supports, kind: shared,
+     supported_by: [ISO9001, IATF16949, ISO13485],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [release_procedure],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-ce_conformity, capability: ce_conformity_assessment_and_technical_documentation, relationship: partially_supports, kind: shared,
+     supported_by: [ISO9001, IATF16949],
+     verification_required: true, question_intent: request_evidence, expected_evidence: [technical_documentation],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  # ── CERT-SPECIFIC — capabilities a single domain's certificate suggests ─────────────────────
+  - {id: HYP-secure_dev, capability: secure_development_lifecycle, relationship: partially_supports, kind: specific,
+     supported_by: [IEC62443, ASPICE],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [secure_development_policy],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-csms, capability: cybersecurity_management_system, relationship: supports, kind: specific,
+     supported_by: [IEC62443],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [csms_records],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-environmental_docs, capability: environmental_management_documentation, relationship: supports, kind: specific,
+     supported_by: [ISO14001],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [environmental_aspects_register],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
+  - {id: HYP-software_process, capability: assess_software_process_capability, relationship: supports, kind: specific,
+     supported_by: [ASPICE],
+     verification_required: true, question_intent: verify_existence, expected_evidence: [aspice_assessment],
+     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
@@ -6,12 +6,12 @@ _Eingabe: Unternehmen + Produkte + Zertifizierungen + Ziel. Den Rest macht die O
 > Zertifizierungen: **ISO9001, ISO27001, ISO14001, TISAX** · Produkt: **Parkschein-/Schrankensystem** · Ziel: **CRA**

 ## Was wir erkannt haben
-> 17 Anforderungen erkannt · 6 wahrscheinlich abgedeckt · 5 zu klären
+> 17 Anforderungen erkannt · 5 wahrscheinlich abgedeckt · 5 zu klären

 **Aus Ihren Zertifizierungen abgeleitet (zu bestätigen, nicht automatisch erfüllt):**
 - ISO9001 legt 1 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
- ISO27001 legt 5 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
- TISAX legt 5 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
+- ISO27001 legt 4 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
+- TISAX legt 4 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
 - _ISO14001 ist für dieses Ziel nicht relevant — relevance(evidence, target) = 0 — keine geforderte Fähigkeit abgedeckt_

 ## Die wenigen offenen Punkte — nur die nächsten besten Fragen
@@ -12,7 +12,7 @@ from __future__ import annotations
 import os
 import yaml

-from compliance.onboarding import OnboardingInput, advisor_start
+from compliance.onboarding import CapabilityHypothesis, OnboardingInput, advisor_start, resolve_for_certifications
 from compliance.transition_reasoning import TargetRequirement

 OUT = []
@@ -29,13 +29,14 @@ req = [TargetRequirement(capability_id=a["capability"]) for a in CRA["likely_cov
 req += [TargetRequirement(capability_id=d["capability"], question_intent=d.get("needed_information", "verify_existence"),
                          expected_evidence=d.get("expected_evidence", [])) for d in CRA["delta_requirements"]]
 covers = {d["capability"]: d.get("covers_targets", []) for d in CRA["delta_requirements"]}
-hyp = {"ISO27001": infosec, "TISAX": infosec,
-       "ISO9001": ["ce_conformity_assessment_and_technical_documentation"],
-       "ISO14001": ["environmental_management_documentation"]}
+# certificate hypotheses come from the CURATED, capability-centric library (multi-cert merges automatically)
+_lib = [CapabilityHypothesis(**h) for h in yaml.safe_load(
+    open(os.path.join(os.path.dirname(__file__), "..", "knowledge", "certification_hypotheses", "hypotheses.yaml"), encoding="utf-8"))["hypotheses"]]
 inp = OnboardingInput(company="synthetisch", industry="machine_builder",
                      products=["Parkschein-/Schrankensystem"], markets=["EU", "DE"],
                      certifications=["ISO9001", "ISO27001", "ISO14001", "TISAX"],
                      known_evidence=["CE process"], target=["CRA"])
+hyp = resolve_for_certifications(inp.certifications, _lib)
 res = advisor_start(inp, hyp, req, target_id="CRA", covers_targets=covers, corpus_status={"CRA": "validated"})

 w("# Smart Onboarding Advisor — was der Nutzer sieht (automatisch, ohne Vertrieb)")
@@ -0,0 +1,75 @@
+"""Certification Capability Hypotheses — capability-centric library + empirical confidence.
+
+Pins the reuse design (one capability, many supporting certs -> ~40-60 hypotheses, not ~300), the
+automatic multi-certification merge, the empirical (computed) confidence loop, and the Welt-1 guarantee
+that capabilities NO cert suggests (SBOM, signed updates, CVD) are never inferred -> they stay in the
+delta and get asked. Then the Advisor consumes the resolved library end-to-end.
+"""
+
+from __future__ import annotations
+
+import os
+
+import yaml
+
+from compliance.onboarding import (
+    CapabilityHypothesis,
+    HypothesisObservations,
+    OnboardingInput,
+    advisor_start,
+    empirical_confidence,
+    inferred_hypotheses,
+    record_observation,
+    resolve_for_certifications,
+)
+from compliance.transition_reasoning import TargetRequirement
+
+_DIR = os.path.dirname(__file__)
+_LIB = [CapabilityHypothesis(**h) for h in yaml.safe_load(
+    open(os.path.join(_DIR, "..", "knowledge", "certification_hypotheses", "hypotheses.yaml"), encoding="utf-8"))["hypotheses"]]
+
+
+def test_library_is_capability_centric_and_reuses_certs():
+    # the shared core is small (reuse, not 30-per-cert) and document control is supported by many certs
+    doc = next(h for h in _LIB if h.capability == "document_and_change_control")
+    assert len(doc.supported_by) >= 4
+    assert len(_LIB) <= 60  # whole library, not ~300
+
+
+def test_multi_certification_merges_automatically():
+    # a company with ISO9001 + ISO14001 + TISAX gets the UNION of their hypotheses, deduped
+    merged = inferred_hypotheses(["ISO9001", "ISO14001", "TISAX"], _LIB)
+    caps = {h.capability for h in merged}
+    assert "document_and_change_control" in caps          # ISO9001 + TISAX
+    assert "information_security_management" in caps        # TISAX
+    assert "environmental_management_documentation" in caps  # ISO14001
+    # SBOM / signed updates are suggested by NO certificate -> never inferred
+    assert "sbom_creation" not in caps and "secure_signed_update_distribution" not in caps
+
+
+def test_empirical_confidence_is_computed_not_assigned():
+    obs = HypothesisObservations()
+    assert empirical_confidence(obs) is None               # null until observed
+    obs = record_observation(obs, True)
+    obs = record_observation(obs, True)
+    obs = record_observation(obs, False)
+    assert empirical_confidence(obs) == 0.67               # 2 / 3, from observations only
+
+
+def test_resolve_adapts_to_advisor_input():
+    res = resolve_for_certifications(["ISO27001", "ISO9001"], _LIB)
+    assert "incident_management" in res["ISO27001"]
+    assert "document_and_change_control" in res["ISO9001"]
+
+
+def test_advisor_consumes_the_library_end_to_end():
+    cra = yaml.safe_load(open(os.path.join(_DIR, "..", "knowledge", "transition_patterns",
+                          "transition_pattern_iso27001_to_cra_maschinenvo_v1.yaml"), encoding="utf-8"))
+    req = [TargetRequirement(capability_id=a["capability"]) for a in cra["likely_covered"]]
+    req += [TargetRequirement(capability_id=d["capability"], expected_evidence=d.get("expected_evidence", []))
+            for d in cra["delta_requirements"]]
+    inp = OnboardingInput(company="x", certifications=["ISO27001", "TISAX", "ISO9001", "ISO14001"], target=["CRA"])
+    hyp = resolve_for_certifications(inp.certifications, _LIB)        # library -> advisor input
+    res = advisor_start(inp, hyp, req, target_id="CRA", corpus_status={"CRA": "validated"})
+    assert res.inferred_assumptions and res.next_best_questions
+    assert any(r.certification == "ISO14001" for r in res.rejected_assumptions)  # not relevant to CRA