Merge pull request 'feat: company capability profile foundation' (#4) from feat/company-intelligence-2a into main
CI / detect-changes (push) Successful in 14s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 10s
CI / validate-canonical-controls (push) Successful in 5s
CI / loc-budget (push) Successful in 20s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 23s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped

This commit is contained in:
pilotadmin
2026-06-26 15:13:21 +02:00
5 changed files with 480 additions and 0 deletions
@@ -0,0 +1,46 @@
"""Company Intelligence (Phase 2A) — Company Capability Profile foundation.
The HEAD of the spine Company -> Capability -> Product -> Regulation -> Obligation
-> Procedure -> Evidence. Builds a CompanyContext into a CompanyCapabilityProfile
with a four-state trust model (declared/inferred/confirmed/unknown). A certification
yields at most an INFERRED candidate — never "erfuellt".
Reasoning OWNS the container + trust-state; it CONSUMES the Certification->Capability
mapping (Execution-owned) via an injected contract — no mapping data in product code.
"""
from __future__ import annotations
from .contract import CapabilityMappingEntry, CertificationCapabilityMap, EMPTY_MAPPING
from .engine import build_company_profile
from .schemas import (
CapabilityEvidence,
Certification,
CompanyCapabilityProfile,
CompanyContext,
Declaration,
ExistingEvidence,
ExistingProcess,
ExistingSystem,
OperationalCapability,
OperationalCapabilityCandidate,
VerificationStatus,
)
__all__ = [
"build_company_profile",
"CompanyContext",
"CompanyCapabilityProfile",
"Certification",
"Declaration",
"ExistingProcess",
"ExistingSystem",
"ExistingEvidence",
"CapabilityEvidence",
"OperationalCapabilityCandidate",
"OperationalCapability",
"VerificationStatus",
"CapabilityMappingEntry",
"CertificationCapabilityMap",
"EMPTY_MAPPING",
]
@@ -0,0 +1,43 @@
"""Consumption contract for the Certification -> Capability mapping.
OWNERSHIP BOUNDARY (hard): the Capability Registry, CapabilityDefinition and the
Certification->Capability / Feature->Capability mapping RULES live in the Compliance
Execution domain. This Reasoning layer defines ONLY the shape it consumes and never
ships mapping DATA in product code — tests inject mocks, so the real table can only
ever live in Execution.
Execution will eventually provide CapabilityRegistry / CapabilityMapping /
CapabilityDefinition; Reasoning consumes exactly `OperationalCapabilityCandidate`
{capability_id, source, confidence, verification_status} (see schemas.py) and the
minimal mapping SHAPE below — nothing more.
Python 3.9 compatible (no `|` unions).
"""
from __future__ import annotations
from typing import Dict, List
from pydantic import BaseModel, Field
from compliance.reasoning.enums import Confidence
class CapabilityMappingEntry(BaseModel):
"""One mapping rule SHAPE: a certification implies candidate capabilities.
Contract type only. The actual table (which capabilities ISO27001 implies) is
Execution's DATA and MUST NOT be hard-coded here or anywhere in product code.
"""
capability_ids: List[str] = Field(default_factory=list)
confidence: Confidence = Confidence.MEDIUM
# certification_id -> entry. Injected at call time; product code holds NO entries.
CertificationCapabilityMap = Dict[str, CapabilityMappingEntry]
# Intentionally empty: without an injected mapping there are zero inferred
# candidates. This is the architectural guarantee that the registry lives only in
# the Compliance Execution domain.
EMPTY_MAPPING: CertificationCapabilityMap = {}
@@ -0,0 +1,114 @@
"""Company Intelligence engine (Phase 2A) — build the Company Capability Profile.
Deterministic, no LLM/RAG. Turns a raw CompanyContext into capability evidence,
candidates and (only via explicit verification) confirmed capabilities.
HARD RULE enforced here: a certification yields at most an INFERRED candidate; it
can NEVER produce a CONFIRMED capability on its own. Only real ExistingEvidence
(`proves_capability_id`) promotes a capability to CONFIRMED. Certifications without
a known mapping yield evidence-of-claim but NO inferred capability (the mapping is
Execution's data, injected — never hard-coded here).
Python 3.9 compatible (no `|` unions).
"""
from __future__ import annotations
from typing import Dict, List, Optional, Tuple
from compliance.reasoning.enums import Confidence
from .contract import EMPTY_MAPPING, CertificationCapabilityMap
from .schemas import (
CapabilityEvidence,
CompanyCapabilityProfile,
CompanyContext,
OperationalCapability,
OperationalCapabilityCandidate,
VerificationStatus,
)
def _declared(context: CompanyContext) -> List[OperationalCapabilityCandidate]:
out: List[OperationalCapabilityCandidate] = []
for d in context.declarations:
out.append(
OperationalCapabilityCandidate(
capability_id=d.capability_id,
source="declaration:%s" % context.company_id,
confidence=Confidence.MEDIUM,
verification_status=VerificationStatus.DECLARED,
)
)
return out
def _from_certifications(
context: CompanyContext, mapping: CertificationCapabilityMap
) -> Tuple[List[CapabilityEvidence], List[OperationalCapabilityCandidate]]:
# refinement 1: certification -> evidence-of-capability (claim) -> inferred candidate
evidence: List[CapabilityEvidence] = []
inferred: List[OperationalCapabilityCandidate] = []
for cert in context.certifications:
source = "certification:%s" % cert.certification_id
evidence.append(
CapabilityEvidence(
source=source,
claim="Company holds %s" % (cert.name or cert.certification_id),
certification_id=cert.certification_id,
)
)
entry = mapping.get(cert.certification_id)
if entry is None:
continue # no mapping known -> NO inferred capability (data is Execution's)
for cap_id in entry.capability_ids:
inferred.append(
OperationalCapabilityCandidate(
capability_id=cap_id,
source=source,
confidence=entry.confidence,
verification_status=VerificationStatus.INFERRED,
)
)
return evidence, inferred
def _confirmed_from_evidence(context: CompanyContext) -> List[OperationalCapability]:
proven: Dict[str, List[str]] = {}
for ev in context.evidence:
cap = ev.proves_capability_id
if not cap:
continue
proven.setdefault(cap, []).append(ev.evidence_id)
return [
OperationalCapability(
capability_id=cap,
verification_status=VerificationStatus.CONFIRMED,
confidence=Confidence.HIGH,
sources=sources,
)
for cap, sources in proven.items()
]
def build_company_profile(
context: CompanyContext, mapping: Optional[CertificationCapabilityMap] = None
) -> CompanyCapabilityProfile:
"""Build the Company Capability Profile from raw context + an injected mapping.
`mapping` defaults to EMPTY (no inferred candidates) so that the cert->capability
table can only ever come from the Compliance Execution domain.
"""
mapping = EMPTY_MAPPING if mapping is None else mapping
evidence, inferred = _from_certifications(context, mapping)
declared = _declared(context)
confirmed = _confirmed_from_evidence(context)
confirmed_ids = {oc.capability_id for oc in confirmed}
# a confirmed capability is no longer a mere candidate
candidates = [c for c in (declared + inferred) if c.capability_id not in confirmed_ids]
return CompanyCapabilityProfile(
company_id=context.company_id,
capability_evidence=evidence,
candidate_capabilities=candidates,
confirmed_capabilities=confirmed,
)
@@ -0,0 +1,150 @@
"""Company Intelligence (Phase 2A) — Company Capability Profile (domain objects).
This is the HEAD of the spine
Company -> (Operational) Capability -> Product -> Applicable Regulation ->
Obligation -> Procedure -> Evidence
and answers a DIFFERENT question than Regulatory Intelligence: not "which laws
apply to my product" but "which capabilities does my company already have, and
which regulatory obligations might they already cover".
HARD RULE (structural, not convention): a capability derived from a certification
is at most INFERRED — never CONFIRMED, never "erfuellt". A certification produces
EVIDENCE for a capability, an inference produces a CANDIDATE, and only checked
evidence produces a CONFIRMED capability. This keeps the company side inside
Welt 1 (potential), mirroring `ClaimCoverage` on the obligation side; it is NOT a
conformity verdict (`ComplianceStatus`, Welt 2, owned by Compliance Execution).
OWNERSHIP: Reasoning OWNS this CompanyContext container + the trust-state machine.
It does NOT own the Certification->Capability mapping RULES — those are the same
kind of rule as Feature->Capability and belong to the Compliance Execution
Capability Registry. This layer only CONSUMES `OperationalCapabilityCandidate`
{capability_id, source, confidence, verification_status} via an injected mapping
(see contract.py). No mapping DATA lives in product code (tests inject mocks).
Application/reasoning types, NOT compliance-meta-model classes (architecture
freeze v1.0 untouched). Python 3.9 compatible (no `|` unions).
"""
from __future__ import annotations
from enum import Enum
from typing import List, Optional
from pydantic import BaseModel, Field
from compliance.reasoning.enums import Confidence
class VerificationStatus(str, Enum):
"""Trust state of an operational capability — a FOURTH vocabulary.
Disjoint from ClaimCoverage (Welt 1, customer claim vs obligation),
ComplianceStatus (Welt 2, verified conformity) and DeltaType (RCI). It says
only how well-established a company CAPABILITY is, never whether an obligation
is met. Progression: DECLARED (customer says) -> INFERRED (a certification
implies it) -> CONFIRMED (checked against real evidence); UNKNOWN = no signal.
"""
DECLARED = "declared"
INFERRED = "inferred"
CONFIRMED = "confirmed"
UNKNOWN = "unknown"
# ── raw company inputs (the CompanyContext children) ──────────────────────
class Certification(BaseModel):
certification_id: str # e.g. "ISO27001"
name: str = ""
scope: str = "" # what the cert covers, customer-stated
class Declaration(BaseModel):
"""A customer statement that they have a capability ("we do patch management")."""
capability_id: str
statement: str = ""
class ExistingProcess(BaseModel):
process_id: str
name: str = ""
class ExistingSystem(BaseModel):
system_id: str
name: str = ""
class ExistingEvidence(BaseModel):
"""A concrete artefact the company already holds (policy, audit log, SBOM ...).
`proves_capability_id` is the ONLY thing that may lift a capability to
CONFIRMED — and only when a human/engine has attached real evidence.
"""
evidence_id: str
evidence_type: str = "" # config_export/test_report/policy/audit_log/...
proves_capability_id: Optional[str] = None
# ── intermediate: certification -> evidence-of-capability (refinement 1) ──
class CapabilityEvidence(BaseModel):
"""A certification does not yield a capability directly — only EVIDENCE for one.
"Company holds a certified ISMS" is the evidence/claim; capabilities are then
INFERRED from it via the injected (Execution-owned) mapping, never directly.
"""
source: str # provenance, e.g. "certification:ISO27001"
claim: str = ""
certification_id: str = ""
# ── consumed contract type (refinement 2) ─────────────────────────────────
class OperationalCapabilityCandidate(BaseModel):
"""The ONLY thing Reasoning consumes from Execution's capability mapping.
Named "operational" (organisational ability) to stay distinct from later
Product/AI/Safety capabilities. A candidate is always Welt 1 — DECLARED or
INFERRED — and never CONFIRMED on its own.
"""
capability_id: str
source: str
confidence: Confidence = Confidence.MEDIUM
verification_status: VerificationStatus = VerificationStatus.INFERRED
class OperationalCapability(BaseModel):
"""A capability the company actually has, CONFIRMED against real evidence."""
capability_id: str
verification_status: VerificationStatus
confidence: Confidence = Confidence.MEDIUM
sources: List[str] = Field(default_factory=list)
# ── the container Reasoning OWNS (raw inputs) ─────────────────────────────
class CompanyContext(BaseModel):
company_id: str
certifications: List[Certification] = Field(default_factory=list)
declarations: List[Declaration] = Field(default_factory=list)
processes: List[ExistingProcess] = Field(default_factory=list)
systems: List[ExistingSystem] = Field(default_factory=list)
evidence: List[ExistingEvidence] = Field(default_factory=list)
# ── derived view (the Company Capability Profile) ─────────────────────────
class CompanyCapabilityProfile(BaseModel):
"""Derived: capability evidence + candidates (declared/inferred) + confirmed.
`candidate_capabilities` NEVER auto-promote to `confirmed_capabilities`; only
explicit ExistingEvidence does that. The hard rule is enforced in engine.py.
"""
company_id: str
capability_evidence: List[CapabilityEvidence] = Field(default_factory=list)
candidate_capabilities: List[OperationalCapabilityCandidate] = Field(default_factory=list)
confirmed_capabilities: List[OperationalCapability] = Field(default_factory=list)
+127
View File
@@ -0,0 +1,127 @@
"""Tests for Company Intelligence (Phase 2A) — Company Capability Profile.
Acceptance: from a CompanyContext (certifications, declarations, evidence) the
engine derives operational capabilities with a four-state trust model and a HARD
RULE: a certification is NEVER auto-treated as "erfuellt" — at most INFERRED.
The Certification->Capability mapping is Execution's domain. It is injected here as
a MOCK (the yaml-like dict below lives ONLY in tests); product code ships no table.
"""
from __future__ import annotations
from compliance.company import (
CapabilityMappingEntry,
Certification,
CompanyContext,
Declaration,
ExistingEvidence,
VerificationStatus,
build_company_profile,
)
from compliance.reasoning.enums import Confidence
# --- MOCK mapping (Execution-owned in reality; here only for the tests) -------
# mapping:
# ISO27001 -> [cap_patch_management, cap_supplier_management]
MOCK_MAPPING = {
"ISO27001": CapabilityMappingEntry(
capability_ids=["cap_patch_management", "cap_supplier_management"],
confidence=Confidence.MEDIUM,
)
}
def _candidate(profile, capability_id):
return [c for c in profile.candidate_capabilities if c.capability_id == capability_id]
def _confirmed_ids(profile):
return {c.capability_id for c in profile.confirmed_capabilities}
# A certification yields INFERRED candidates via the injected mapping.
def test_certification_infers_candidates_via_injected_mapping():
ctx = CompanyContext(company_id="acme", certifications=[Certification(certification_id="ISO27001")])
profile = build_company_profile(ctx, MOCK_MAPPING)
ids = {c.capability_id for c in profile.candidate_capabilities}
assert ids == {"cap_patch_management", "cap_supplier_management"}
for c in profile.candidate_capabilities:
assert c.verification_status == VerificationStatus.INFERRED
assert c.source == "certification:ISO27001"
# Without an injected mapping there are NO inferred capabilities — only the claim.
# This is the architectural guarantee that the table lives only in Execution.
def test_no_mapping_no_inferred_capabilities():
ctx = CompanyContext(company_id="acme", certifications=[Certification(certification_id="ISO27001")])
profile = build_company_profile(ctx) # default EMPTY mapping
assert profile.candidate_capabilities == []
# the certification still produced evidence-of-claim (refinement 1)
assert len(profile.capability_evidence) == 1
assert profile.capability_evidence[0].source == "certification:ISO27001"
assert profile.capability_evidence[0].certification_id == "ISO27001"
# A customer declaration yields a DECLARED candidate.
def test_declaration_yields_declared_candidate():
ctx = CompanyContext(company_id="acme", declarations=[Declaration(capability_id="cap_patch_management")])
profile = build_company_profile(ctx, MOCK_MAPPING)
cands = _candidate(profile, "cap_patch_management")
assert len(cands) == 1
assert cands[0].verification_status == VerificationStatus.DECLARED
# declared + inferred coexist as distinct signals for the same capability.
def test_declared_and_inferred_coexist():
ctx = CompanyContext(
company_id="acme",
certifications=[Certification(certification_id="ISO27001")],
declarations=[Declaration(capability_id="cap_patch_management")],
)
profile = build_company_profile(ctx, MOCK_MAPPING)
statuses = {c.verification_status for c in _candidate(profile, "cap_patch_management")}
assert statuses == {VerificationStatus.DECLARED, VerificationStatus.INFERRED}
# HARD RULE: a certification alone NEVER yields a confirmed capability.
def test_hard_rule_certification_never_confirmed():
ctx = CompanyContext(company_id="acme", certifications=[Certification(certification_id="ISO27001")])
profile = build_company_profile(ctx, MOCK_MAPPING)
assert _confirmed_ids(profile) == set()
for c in profile.candidate_capabilities:
assert c.verification_status != VerificationStatus.CONFIRMED
# Only real evidence confirms a capability — and it leaves the candidate list.
def test_evidence_confirms_capability():
ctx = CompanyContext(
company_id="acme",
certifications=[Certification(certification_id="ISO27001")],
evidence=[ExistingEvidence(evidence_id="pol-1", evidence_type="policy", proves_capability_id="cap_patch_management")],
)
profile = build_company_profile(ctx, MOCK_MAPPING)
assert "cap_patch_management" in _confirmed_ids(profile)
confirmed = [c for c in profile.confirmed_capabilities if c.capability_id == "cap_patch_management"][0]
assert confirmed.verification_status == VerificationStatus.CONFIRMED
assert confirmed.confidence == Confidence.HIGH
assert confirmed.sources == ["pol-1"]
# a confirmed capability is no longer a mere candidate
assert _candidate(profile, "cap_patch_management") == []
# the un-proven capability stays an inferred candidate
assert _candidate(profile, "cap_supplier_management")[0].verification_status == VerificationStatus.INFERRED
# The four-state vocabulary exists and is ordered declared->inferred->confirmed (+unknown).
def test_four_states_present():
assert {s.value for s in VerificationStatus} == {"declared", "inferred", "confirmed", "unknown"}
# verification_status is a FOURTH vocabulary, disjoint from ClaimCoverage and DeltaType.
def test_verification_status_distinct_vocabulary():
from compliance.rci.schemas import DeltaType
from compliance.reasoning.enums import ClaimCoverage
verif = {s.value for s in VerificationStatus}
assert verif.isdisjoint({c.value for c in ClaimCoverage})
assert verif.isdisjoint({d.value for d in DeltaType})