feat: Certification Capability Hypotheses — capability-centric library + empirical confidence
The bottleneck is knowledge, not the endpoint. This builds the knowledge the Onboarding Advisor needs, restructured per the user's key insight: NOT "ISO27001 -> 30 capabilities" but each hypothesis as its own object "capability -> supported_by: [certs]". A capability is written ONCE with all supporting certs, so the shared management-system core (document control, incident, supplier, audit, access, asset, monitoring, training, crypto, release, risk) covers most certifications with ~18 hypotheses instead of ~300 — and multi-certification merges AUTOMATICALLY (a company's inferred caps = every hypothesis whose supported_by intersects its certs). Welt-1 throughout: "IF cert present, EXPECT capability (verification required)", never "erfüllt". Capabilities NO cert suggests (SBOM, signed updates, CVD, support period) have no hypothesis -> they stay in the delta and get asked. confidence is EMPIRICAL: computed from real-onboarding observations (confirmed/(confirmed+refuted)), None until calibrated — never an LLM/expert score (record_observation + empirical_confidence). The long-term moat: knowledge that learns from reality, not from a norm. compliance/onboarding/hypotheses.py (resolve_for_certifications / inferred_hypotheses / empirical_ confidence / record_observation) feeds the existing advisor_start unchanged; the demo now runs on the curated library. Pure, mypy --strict clean, library is DATA (no norm text, no real names). Non-runtime -> no deploy. 12 tests pass, check-loc 0.
This commit is contained in:
@@ -9,6 +9,14 @@ meta-model; certificate->capability hypotheses and target requirements are INJEC
|
||||
from __future__ import annotations
|
||||
|
||||
from .engine import advisor_start, apply_answer
|
||||
from .hypotheses import (
|
||||
CapabilityHypothesis,
|
||||
HypothesisObservations,
|
||||
empirical_confidence,
|
||||
inferred_hypotheses,
|
||||
record_observation,
|
||||
resolve_for_certifications,
|
||||
)
|
||||
from .schemas import (
|
||||
AdvisorMeasure,
|
||||
AdvisorQuestion,
|
||||
@@ -27,4 +35,10 @@ __all__ = [
|
||||
"AdvisorMeasure",
|
||||
"InferredAssumption",
|
||||
"RejectedAssumption",
|
||||
"CapabilityHypothesis",
|
||||
"HypothesisObservations",
|
||||
"empirical_confidence",
|
||||
"record_observation",
|
||||
"inferred_hypotheses",
|
||||
"resolve_for_certifications",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
"""Certification Capability Hypotheses — capability-centric, with EMPIRICAL (computed) confidence.
|
||||
|
||||
Each hypothesis is its own knowledge object: "IF a company holds one of `supported_by` certs, we EXPECT
|
||||
`capability` (verification required)" — Welt-1, never "erfüllt". Written ONCE per capability with a list
|
||||
of supporting certs (reuse, not redundancy), so multi-certification merges AUTOMATICALLY.
|
||||
|
||||
`confidence` is NOT an expert/LLM score: it is COMPUTED from real-onboarding observations
|
||||
(confirmed / (confirmed+refuted)), `None` until any are seen. This is the empirical learning loop — the
|
||||
long-term moat. The library is DATA, loaded outside this module and injected. Python 3.9 compatible.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Optional, Sequence
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class HypothesisObservations(BaseModel):
|
||||
confirmed: int = 0
|
||||
refuted: int = 0
|
||||
|
||||
|
||||
class CapabilityHypothesis(BaseModel):
|
||||
id: str
|
||||
capability: str
|
||||
supported_by: List[str] = Field(default_factory=list) # certifications that suggest this capability
|
||||
relationship: str = "supports" # supports / partially_supports
|
||||
verification_required: bool = True # Welt-1: never auto-satisfied
|
||||
question_intent: str = "verify_existence"
|
||||
expected_evidence: List[str] = Field(default_factory=list)
|
||||
observations: HypothesisObservations = Field(default_factory=HypothesisObservations)
|
||||
kind: str = "shared" # shared / specific
|
||||
|
||||
|
||||
def empirical_confidence(obs: HypothesisObservations) -> Optional[float]:
|
||||
"""Confidence from observations only: confirmed / (confirmed+refuted). None until any are recorded."""
|
||||
n = obs.confirmed + obs.refuted
|
||||
return round(obs.confirmed / n, 2) if n else None
|
||||
|
||||
|
||||
def record_observation(obs: HypothesisObservations, confirmed: bool) -> HypothesisObservations:
|
||||
"""One real-onboarding observation -> updated counts (the empirical calibration step)."""
|
||||
return HypothesisObservations(
|
||||
confirmed=obs.confirmed + (1 if confirmed else 0),
|
||||
refuted=obs.refuted + (0 if confirmed else 1),
|
||||
)
|
||||
|
||||
|
||||
def inferred_hypotheses(
|
||||
certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
|
||||
) -> List[CapabilityHypothesis]:
|
||||
"""Every hypothesis whose `supported_by` intersects the company's certs — the auto multi-cert merge."""
|
||||
certs = set(certifications)
|
||||
return [h for h in library if certs & set(h.supported_by)]
|
||||
|
||||
|
||||
def resolve_for_certifications(
|
||||
certifications: Sequence[str], library: Sequence[CapabilityHypothesis]
|
||||
) -> Dict[str, List[str]]:
|
||||
"""Adapt the capability-centric library to the Advisor's `cert -> [capability]` input.
|
||||
|
||||
For each held certification, the capabilities its hypotheses suggest (deduped, deterministic order).
|
||||
"""
|
||||
certs = set(certifications)
|
||||
out: Dict[str, List[str]] = {}
|
||||
for h in library:
|
||||
for cert in h.supported_by:
|
||||
if cert in certs and h.capability not in out.setdefault(cert, []):
|
||||
out[cert].append(h.capability)
|
||||
return {c: out[c] for c in sorted(out)}
|
||||
Reference in New Issue
Block a user