feat: Certification Capability Hypotheses — capability-centric library + empirical confidence

The bottleneck is knowledge, not the endpoint. This builds the knowledge the Onboarding Advisor needs, restructured per the user's key insight: NOT "ISO27001 -> 30 capabilities" but each hypothesis as its own object "capability -> supported_by: [certs]". A capability is written ONCE with all supporting certs, so the shared management-system core (document control, incident, supplier, audit, access, asset, monitoring, training, crypto, release, risk) covers most certifications with ~18 hypotheses instead of ~300 — and multi-certification merges AUTOMATICALLY (a company's inferred caps = every hypothesis whose supported_by intersects its certs). Welt-1 throughout: "IF cert present, EXPECT capability (verification required)", never "erfüllt". Capabilities NO cert suggests (SBOM, signed updates, CVD, support period) have no hypothesis -> they stay in the delta and get asked. confidence is EMPIRICAL: computed from real-onboarding observations (confirmed/(confirmed+refuted)), None until calibrated — never an LLM/expert score (record_observation + empirical_confidence). The long-term moat: knowledge that learns from reality, not from a norm. compliance/onboarding/hypotheses.py (resolve_for_certifications / inferred_hypotheses / empirical_ confidence / record_observation) feeds the existing advisor_start unchanged; the demo now runs on the curated library. Pure, mypy --strict clean, library is DATA (no norm text, no real names). Non-runtime -> no deploy. 12 tests pass, check-loc 0.
2026-06-28 13:16:45 +02:00
parent 02c9fdb18e
commit 2d2cb2a244
6 changed files with 260 additions and 7 deletions
@@ -9,6 +9,14 @@ meta-model; certificate->capability hypotheses and target requirements are INJEC
 from __future__ import annotations
 from .engine import advisor_start, apply_answer
 from .hypotheses import (
    CapabilityHypothesis,
    HypothesisObservations,
    empirical_confidence,
    inferred_hypotheses,
    record_observation,
    resolve_for_certifications,
 )
 from .schemas import (
    AdvisorMeasure,
    AdvisorQuestion,
@@ -27,4 +35,10 @@ __all__ = [
    "AdvisorMeasure",
    "InferredAssumption",
    "RejectedAssumption",
    "CapabilityHypothesis",
    "HypothesisObservations",
    "empirical_confidence",
    "record_observation",
    "inferred_hypotheses",
    "resolve_for_certifications",
 ]
@@ -0,0 +1,71 @@
 """Certification Capability Hypotheses — capability-centric, with EMPIRICAL (computed) confidence.
 Each hypothesis is its own knowledge object: "IF a company holds one of `supported_by` certs, we EXPECT
 `capability` (verification required)" — Welt-1, never "erfüllt". Written ONCE per capability with a list
 of supporting certs (reuse, not redundancy), so multi-certification merges AUTOMATICALLY.
 `confidence` is NOT an expert/LLM score: it is COMPUTED from real-onboarding observations
 (confirmed / (confirmed+refuted)), `None` until any are seen. This is the empirical learning loop — the
 long-term moat. The library is DATA, loaded outside this module and injected. Python 3.9 compatible.
 """
 from __future__ import annotations
 from typing import Dict, List, Optional, Sequence
 from pydantic import BaseModel, Field
 class HypothesisObservations(BaseModel):
    confirmed: int = 0
    refuted: int = 0
 class CapabilityHypothesis(BaseModel):
    id: str
    capability: str
    supported_by: List[str] = Field(default_factory=list)        # certifications that suggest this capability
    relationship: str = "supports"                               # supports / partially_supports
    verification_required: bool = True                           # Welt-1: never auto-satisfied
    question_intent: str = "verify_existence"
    expected_evidence: List[str] = Field(default_factory=list)
    observations: HypothesisObservations = Field(default_factory=HypothesisObservations)
    kind: str = "shared"                                         # shared / specific
 def empirical_confidence(obs: HypothesisObservations) -> Optional[float]:
    """Confidence from observations only: confirmed / (confirmed+refuted). None until any are recorded."""
    n = obs.confirmed + obs.refuted
    return round(obs.confirmed / n, 2) if n else None
 def record_observation(obs: HypothesisObservations, confirmed: bool) -> HypothesisObservations:
    """One real-onboarding observation -> updated counts (the empirical calibration step)."""
    return HypothesisObservations(
        confirmed=obs.confirmed + (1 if confirmed else 0),
        refuted=obs.refuted + (0 if confirmed else 1),
    )
 def inferred_hypotheses(
    certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
 ) -> List[CapabilityHypothesis]:
    """Every hypothesis whose `supported_by` intersects the company's certs — the auto multi-cert merge."""
    certs = set(certifications)
    return [h for h in library if certs & set(h.supported_by)]
 def resolve_for_certifications(
    certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
 ) -> Dict[str, List[str]]:
    """Adapt the capability-centric library to the Advisor's `cert -> [capability]` input.
    For each held certification, the capabilities its hypotheses suggest (deduped, deterministic order).
    """
    certs = set(certifications)
    out: Dict[str, List[str]] = {}
    for h in library:
        for cert in h.supported_by:
            if cert in certs and h.capability not in out.setdefault(cert, []):
                out[cert].append(h.capability)
    return {c: out[c] for c in sorted(out)}
@@ -0,0 +1,92 @@
 # Certification Capability Hypotheses — CAPABILITY-CENTRIC, shared core first.
 #
 # Proprietary norms (ISO/TISAX/PCI…) are NOT ingested. Instead each hypothesis is its own knowledge
 # object: "IF a company holds these certifications, we EXPECT this capability with some probability —
 # verification required". NOT "ISO 27001 HAS X" (Welt-2) but "ISO 27001 SUGGESTS X" (Welt-1).
 #
 # THE TRICK (reuse, not redundancy): a capability is written ONCE with `supported_by: [certs]`. Most
 # management-system capabilities (document control, incident, supplier, audit, risk, asset, access,
 # training, monitoring) recur across many certs, so ~40-60 hypotheses cover everything instead of ~300.
 # Multi-certification then merges AUTOMATICALLY (a company's inferred caps = every hypothesis whose
 # supported_by intersects its certs). capability ids match the existing transition patterns.
 #
 # `confidence.empirical` stays NULL until calibrated from REAL onboardings (observations.confirmed /
 # refuted) — never an LLM/expert score. Capabilities a cert does NOT suggest (SBOM, CVD, support period,
 # signed updates) simply have NO hypothesis -> they always stay in the delta and get asked. AI first
 # draft (~95%), expert review + customer calibration follow. No norm text reproduced. No real names.
 hypotheses:
  # ── SHARED CORE — management-system capabilities that recur across certifications ───────────
  - {id: HYP-document_control, capability: document_and_change_control, relationship: supports, kind: shared,
     supported_by: [ISO9001, ISO13485, ISO27001, TISAX, ASPICE, IATF16949],
     verification_required: true, question_intent: verify_existence, expected_evidence: [document_control_procedure],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-incident_management, capability: incident_management, relationship: supports, kind: shared,
     supported_by: [ISO27001, TISAX, IEC62443, ISO13485],
     verification_required: true, question_intent: verify_existence, expected_evidence: [incident_procedure],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-supplier_security, capability: supplier_security, relationship: supports, kind: shared,
     supported_by: [ISO27001, TISAX, IEC62443],
     verification_required: true, question_intent: verify_existence, expected_evidence: [supplier_security_records],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-supplier_evaluation, capability: supplier_evaluation, relationship: supports, kind: shared,
     supported_by: [ISO9001, IATF16949, ISO13485],
     verification_required: true, question_intent: verify_existence, expected_evidence: [supplier_evaluation_records],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-access_control, capability: access_control_and_authentication, relationship: supports, kind: shared,
     supported_by: [ISO27001, TISAX, IEC62443],
     verification_required: true, question_intent: verify_existence, expected_evidence: [access_control_policy],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-logging_monitoring, capability: security_logging_and_monitoring, relationship: supports, kind: shared,
     supported_by: [ISO27001, TISAX, IEC62443],
     verification_required: true, question_intent: verify_existence, expected_evidence: [logging_configuration],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-asset_config, capability: asset_and_configuration_management, relationship: supports, kind: shared,
     supported_by: [ISO27001, TISAX, IEC62443],
     verification_required: true, question_intent: verify_existence, expected_evidence: [asset_inventory],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-vuln_management, capability: technical_vulnerability_management, relationship: partially_supports, kind: shared,
     supported_by: [ISO27001, TISAX, IEC62443],
     verification_required: true, question_intent: confirm_product_scope, expected_evidence: [vulnerability_management_process],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-isms, capability: information_security_management, relationship: supports, kind: shared,
     supported_by: [ISO27001, TISAX],
     verification_required: true, question_intent: verify_existence, expected_evidence: [isms_scope],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-cryptography, capability: cryptography, relationship: supports, kind: shared,
     supported_by: [ISO27001, TISAX, IEC62443],
     verification_required: true, question_intent: verify_existence, expected_evidence: [crypto_policy],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-training, capability: security_awareness_training, relationship: supports, kind: shared,
     supported_by: [ISO27001, TISAX],
     verification_required: true, question_intent: verify_existence, expected_evidence: [training_records],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-prototype_protection, capability: protect_prototypes, relationship: supports, kind: shared,
     supported_by: [TISAX],
     verification_required: true, question_intent: verify_existence, expected_evidence: [prototype_protection_policy],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-release_approval, capability: release_and_approval_process, relationship: supports, kind: shared,
     supported_by: [ISO9001, IATF16949, ISO13485],
     verification_required: true, question_intent: verify_existence, expected_evidence: [release_procedure],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-ce_conformity, capability: ce_conformity_assessment_and_technical_documentation, relationship: partially_supports, kind: shared,
     supported_by: [ISO9001, IATF16949],
     verification_required: true, question_intent: request_evidence, expected_evidence: [technical_documentation],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  # ── CERT-SPECIFIC — capabilities a single domain's certificate suggests ─────────────────────
  - {id: HYP-secure_dev, capability: secure_development_lifecycle, relationship: partially_supports, kind: specific,
     supported_by: [IEC62443, ASPICE],
     verification_required: true, question_intent: verify_existence, expected_evidence: [secure_development_policy],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-csms, capability: cybersecurity_management_system, relationship: supports, kind: specific,
     supported_by: [IEC62443],
     verification_required: true, question_intent: verify_existence, expected_evidence: [csms_records],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-environmental_docs, capability: environmental_management_documentation, relationship: supports, kind: specific,
     supported_by: [ISO14001],
     verification_required: true, question_intent: verify_existence, expected_evidence: [environmental_aspects_register],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
  - {id: HYP-software_process, capability: assess_software_process_capability, relationship: supports, kind: specific,
     supported_by: [ASPICE],
     verification_required: true, question_intent: verify_existence, expected_evidence: [aspice_assessment],
     confidence: {empirical: null}, observations: {confirmed: 0, refuted: 0}}
@@ -6,12 +6,12 @@ _Eingabe: Unternehmen + Produkte + Zertifizierungen + Ziel. Den Rest macht die O
 > Zertifizierungen: **ISO9001, ISO27001, ISO14001, TISAX** · Produkt: **Parkschein-/Schrankensystem** · Ziel: **CRA**
 ## Was wir erkannt haben
-> 17 Anforderungen erkannt · 6 wahrscheinlich abgedeckt · 5 zu klären
+> 17 Anforderungen erkannt · 5 wahrscheinlich abgedeckt · 5 zu klären
 **Aus Ihren Zertifizierungen abgeleitet (zu bestätigen, nicht automatisch erfüllt):**
 - ISO9001 legt 1 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
- ISO27001 legt 5 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
+- ISO27001 legt 4 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
- TISAX legt 5 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
+- TISAX legt 4 relevante Fähigkeit(en) nahe — Verifikation erforderlich, nicht automatisch erfüllt
 - _ISO14001 ist für dieses Ziel nicht relevant — relevance(evidence, target) = 0 — keine geforderte Fähigkeit abgedeckt_
 ## Die wenigen offenen Punkte — nur die nächsten besten Fragen
@@ -12,7 +12,7 @@ from __future__ import annotations
 import os
 import yaml
-from compliance.onboarding import OnboardingInput, advisor_start
+from compliance.onboarding import CapabilityHypothesis, OnboardingInput, advisor_start, resolve_for_certifications
 from compliance.transition_reasoning import TargetRequirement
 OUT = []
@@ -29,13 +29,14 @@ req = [TargetRequirement(capability_id=a["capability"]) for a in CRA["likely_cov
 req += [TargetRequirement(capability_id=d["capability"], question_intent=d.get("needed_information", "verify_existence"),
                          expected_evidence=d.get("expected_evidence", [])) for d in CRA["delta_requirements"]]
 covers = {d["capability"]: d.get("covers_targets", []) for d in CRA["delta_requirements"]}
-hyp = {"ISO27001": infosec, "TISAX": infosec,
+# certificate hypotheses come from the CURATED, capability-centric library (multi-cert merges automatically)
-       "ISO9001": ["ce_conformity_assessment_and_technical_documentation"],
+_lib = [CapabilityHypothesis(**h) for h in yaml.safe_load(
-       "ISO14001": ["environmental_management_documentation"]}
+    open(os.path.join(os.path.dirname(__file__), "..", "knowledge", "certification_hypotheses", "hypotheses.yaml"), encoding="utf-8"))["hypotheses"]]
 inp = OnboardingInput(company="synthetisch", industry="machine_builder",
                      products=["Parkschein-/Schrankensystem"], markets=["EU", "DE"],
                      certifications=["ISO9001", "ISO27001", "ISO14001", "TISAX"],
                      known_evidence=["CE process"], target=["CRA"])
 hyp = resolve_for_certifications(inp.certifications, _lib)
 res = advisor_start(inp, hyp, req, target_id="CRA", covers_targets=covers, corpus_status={"CRA": "validated"})
 w("# Smart Onboarding Advisor — was der Nutzer sieht (automatisch, ohne Vertrieb)")
@@ -0,0 +1,75 @@
 """Certification Capability Hypotheses — capability-centric library + empirical confidence.
 Pins the reuse design (one capability, many supporting certs -> ~40-60 hypotheses, not ~300), the
 automatic multi-certification merge, the empirical (computed) confidence loop, and the Welt-1 guarantee
 that capabilities NO cert suggests (SBOM, signed updates, CVD) are never inferred -> they stay in the
 delta and get asked. Then the Advisor consumes the resolved library end-to-end.
 """
 from __future__ import annotations
 import os
 import yaml
 from compliance.onboarding import (
    CapabilityHypothesis,
    HypothesisObservations,
    OnboardingInput,
    advisor_start,
    empirical_confidence,
    inferred_hypotheses,
    record_observation,
    resolve_for_certifications,
 )
 from compliance.transition_reasoning import TargetRequirement
 _DIR = os.path.dirname(__file__)
 _LIB = [CapabilityHypothesis(**h) for h in yaml.safe_load(
    open(os.path.join(_DIR, "..", "knowledge", "certification_hypotheses", "hypotheses.yaml"), encoding="utf-8"))["hypotheses"]]
 def test_library_is_capability_centric_and_reuses_certs():
    # the shared core is small (reuse, not 30-per-cert) and document control is supported by many certs
    doc = next(h for h in _LIB if h.capability == "document_and_change_control")
    assert len(doc.supported_by) >= 4
    assert len(_LIB) <= 60  # whole library, not ~300
 def test_multi_certification_merges_automatically():
    # a company with ISO9001 + ISO14001 + TISAX gets the UNION of their hypotheses, deduped
    merged = inferred_hypotheses(["ISO9001", "ISO14001", "TISAX"], _LIB)
    caps = {h.capability for h in merged}
    assert "document_and_change_control" in caps          # ISO9001 + TISAX
    assert "information_security_management" in caps        # TISAX
    assert "environmental_management_documentation" in caps  # ISO14001
    # SBOM / signed updates are suggested by NO certificate -> never inferred
    assert "sbom_creation" not in caps and "secure_signed_update_distribution" not in caps
 def test_empirical_confidence_is_computed_not_assigned():
    obs = HypothesisObservations()
    assert empirical_confidence(obs) is None               # null until observed
    obs = record_observation(obs, True)
    obs = record_observation(obs, True)
    obs = record_observation(obs, False)
    assert empirical_confidence(obs) == 0.67               # 2 / 3, from observations only
 def test_resolve_adapts_to_advisor_input():
    res = resolve_for_certifications(["ISO27001", "ISO9001"], _LIB)
    assert "incident_management" in res["ISO27001"]
    assert "document_and_change_control" in res["ISO9001"]
 def test_advisor_consumes_the_library_end_to_end():
    cra = yaml.safe_load(open(os.path.join(_DIR, "..", "knowledge", "transition_patterns",
                          "transition_pattern_iso27001_to_cra_maschinenvo_v1.yaml"), encoding="utf-8"))
    req = [TargetRequirement(capability_id=a["capability"]) for a in cra["likely_covered"]]
    req += [TargetRequirement(capability_id=d["capability"], expected_evidence=d.get("expected_evidence", []))
            for d in cra["delta_requirements"]]
    inp = OnboardingInput(company="x", certifications=["ISO27001", "TISAX", "ISO9001", "ISO14001"], target=["CRA"])
    hyp = resolve_for_certifications(inp.certifications, _LIB)        # library -> advisor input
    res = advisor_start(inp, hyp, req, target_id="CRA", corpus_status={"CRA": "validated"})
    assert res.inferred_assumptions and res.next_best_questions
    assert any(r.certification == "ISO14001" for r in res.rejected_assumptions)  # not relevant to CRA