diff --git a/backend-compliance/compliance/company/__init__.py b/backend-compliance/compliance/company/__init__.py new file mode 100644 index 00000000..39a35f1b --- /dev/null +++ b/backend-compliance/compliance/company/__init__.py @@ -0,0 +1,46 @@ +"""Company Intelligence (Phase 2A) — Company Capability Profile foundation. + +The HEAD of the spine Company -> Capability -> Product -> Regulation -> Obligation +-> Procedure -> Evidence. Builds a CompanyContext into a CompanyCapabilityProfile +with a four-state trust model (declared/inferred/confirmed/unknown). A certification +yields at most an INFERRED candidate — never "erfuellt". + +Reasoning OWNS the container + trust-state; it CONSUMES the Certification->Capability +mapping (Execution-owned) via an injected contract — no mapping data in product code. +""" + +from __future__ import annotations + +from .contract import CapabilityMappingEntry, CertificationCapabilityMap, EMPTY_MAPPING +from .engine import build_company_profile +from .schemas import ( + CapabilityEvidence, + Certification, + CompanyCapabilityProfile, + CompanyContext, + Declaration, + ExistingEvidence, + ExistingProcess, + ExistingSystem, + OperationalCapability, + OperationalCapabilityCandidate, + VerificationStatus, +) + +__all__ = [ + "build_company_profile", + "CompanyContext", + "CompanyCapabilityProfile", + "Certification", + "Declaration", + "ExistingProcess", + "ExistingSystem", + "ExistingEvidence", + "CapabilityEvidence", + "OperationalCapabilityCandidate", + "OperationalCapability", + "VerificationStatus", + "CapabilityMappingEntry", + "CertificationCapabilityMap", + "EMPTY_MAPPING", +] diff --git a/backend-compliance/compliance/company/contract.py b/backend-compliance/compliance/company/contract.py new file mode 100644 index 00000000..89e9459d --- /dev/null +++ b/backend-compliance/compliance/company/contract.py @@ -0,0 +1,43 @@ +"""Consumption contract for the Certification -> Capability mapping. + +OWNERSHIP BOUNDARY (hard): the Capability Registry, CapabilityDefinition and the +Certification->Capability / Feature->Capability mapping RULES live in the Compliance +Execution domain. This Reasoning layer defines ONLY the shape it consumes and never +ships mapping DATA in product code — tests inject mocks, so the real table can only +ever live in Execution. + +Execution will eventually provide CapabilityRegistry / CapabilityMapping / +CapabilityDefinition; Reasoning consumes exactly `OperationalCapabilityCandidate` +{capability_id, source, confidence, verification_status} (see schemas.py) and the +minimal mapping SHAPE below — nothing more. + +Python 3.9 compatible (no `|` unions). +""" + +from __future__ import annotations + +from typing import Dict, List + +from pydantic import BaseModel, Field + +from compliance.reasoning.enums import Confidence + + +class CapabilityMappingEntry(BaseModel): + """One mapping rule SHAPE: a certification implies candidate capabilities. + + Contract type only. The actual table (which capabilities ISO27001 implies) is + Execution's DATA and MUST NOT be hard-coded here or anywhere in product code. + """ + + capability_ids: List[str] = Field(default_factory=list) + confidence: Confidence = Confidence.MEDIUM + + +# certification_id -> entry. Injected at call time; product code holds NO entries. +CertificationCapabilityMap = Dict[str, CapabilityMappingEntry] + +# Intentionally empty: without an injected mapping there are zero inferred +# candidates. This is the architectural guarantee that the registry lives only in +# the Compliance Execution domain. +EMPTY_MAPPING: CertificationCapabilityMap = {} diff --git a/backend-compliance/compliance/company/engine.py b/backend-compliance/compliance/company/engine.py new file mode 100644 index 00000000..377aa170 --- /dev/null +++ b/backend-compliance/compliance/company/engine.py @@ -0,0 +1,114 @@ +"""Company Intelligence engine (Phase 2A) — build the Company Capability Profile. + +Deterministic, no LLM/RAG. Turns a raw CompanyContext into capability evidence, +candidates and (only via explicit verification) confirmed capabilities. + +HARD RULE enforced here: a certification yields at most an INFERRED candidate; it +can NEVER produce a CONFIRMED capability on its own. Only real ExistingEvidence +(`proves_capability_id`) promotes a capability to CONFIRMED. Certifications without +a known mapping yield evidence-of-claim but NO inferred capability (the mapping is +Execution's data, injected — never hard-coded here). + +Python 3.9 compatible (no `|` unions). +""" + +from __future__ import annotations + +from typing import Dict, List, Optional, Tuple + +from compliance.reasoning.enums import Confidence + +from .contract import EMPTY_MAPPING, CertificationCapabilityMap +from .schemas import ( + CapabilityEvidence, + CompanyCapabilityProfile, + CompanyContext, + OperationalCapability, + OperationalCapabilityCandidate, + VerificationStatus, +) + + +def _declared(context: CompanyContext) -> List[OperationalCapabilityCandidate]: + out: List[OperationalCapabilityCandidate] = [] + for d in context.declarations: + out.append( + OperationalCapabilityCandidate( + capability_id=d.capability_id, + source="declaration:%s" % context.company_id, + confidence=Confidence.MEDIUM, + verification_status=VerificationStatus.DECLARED, + ) + ) + return out + + +def _from_certifications( + context: CompanyContext, mapping: CertificationCapabilityMap +) -> Tuple[List[CapabilityEvidence], List[OperationalCapabilityCandidate]]: + # refinement 1: certification -> evidence-of-capability (claim) -> inferred candidate + evidence: List[CapabilityEvidence] = [] + inferred: List[OperationalCapabilityCandidate] = [] + for cert in context.certifications: + source = "certification:%s" % cert.certification_id + evidence.append( + CapabilityEvidence( + source=source, + claim="Company holds %s" % (cert.name or cert.certification_id), + certification_id=cert.certification_id, + ) + ) + entry = mapping.get(cert.certification_id) + if entry is None: + continue # no mapping known -> NO inferred capability (data is Execution's) + for cap_id in entry.capability_ids: + inferred.append( + OperationalCapabilityCandidate( + capability_id=cap_id, + source=source, + confidence=entry.confidence, + verification_status=VerificationStatus.INFERRED, + ) + ) + return evidence, inferred + + +def _confirmed_from_evidence(context: CompanyContext) -> List[OperationalCapability]: + proven: Dict[str, List[str]] = {} + for ev in context.evidence: + cap = ev.proves_capability_id + if not cap: + continue + proven.setdefault(cap, []).append(ev.evidence_id) + return [ + OperationalCapability( + capability_id=cap, + verification_status=VerificationStatus.CONFIRMED, + confidence=Confidence.HIGH, + sources=sources, + ) + for cap, sources in proven.items() + ] + + +def build_company_profile( + context: CompanyContext, mapping: Optional[CertificationCapabilityMap] = None +) -> CompanyCapabilityProfile: + """Build the Company Capability Profile from raw context + an injected mapping. + + `mapping` defaults to EMPTY (no inferred candidates) so that the cert->capability + table can only ever come from the Compliance Execution domain. + """ + mapping = EMPTY_MAPPING if mapping is None else mapping + evidence, inferred = _from_certifications(context, mapping) + declared = _declared(context) + confirmed = _confirmed_from_evidence(context) + confirmed_ids = {oc.capability_id for oc in confirmed} + # a confirmed capability is no longer a mere candidate + candidates = [c for c in (declared + inferred) if c.capability_id not in confirmed_ids] + return CompanyCapabilityProfile( + company_id=context.company_id, + capability_evidence=evidence, + candidate_capabilities=candidates, + confirmed_capabilities=confirmed, + ) diff --git a/backend-compliance/compliance/company/schemas.py b/backend-compliance/compliance/company/schemas.py new file mode 100644 index 00000000..661b9293 --- /dev/null +++ b/backend-compliance/compliance/company/schemas.py @@ -0,0 +1,150 @@ +"""Company Intelligence (Phase 2A) — Company Capability Profile (domain objects). + +This is the HEAD of the spine + + Company -> (Operational) Capability -> Product -> Applicable Regulation -> + Obligation -> Procedure -> Evidence + +and answers a DIFFERENT question than Regulatory Intelligence: not "which laws +apply to my product" but "which capabilities does my company already have, and +which regulatory obligations might they already cover". + +HARD RULE (structural, not convention): a capability derived from a certification +is at most INFERRED — never CONFIRMED, never "erfuellt". A certification produces +EVIDENCE for a capability, an inference produces a CANDIDATE, and only checked +evidence produces a CONFIRMED capability. This keeps the company side inside +Welt 1 (potential), mirroring `ClaimCoverage` on the obligation side; it is NOT a +conformity verdict (`ComplianceStatus`, Welt 2, owned by Compliance Execution). + +OWNERSHIP: Reasoning OWNS this CompanyContext container + the trust-state machine. +It does NOT own the Certification->Capability mapping RULES — those are the same +kind of rule as Feature->Capability and belong to the Compliance Execution +Capability Registry. This layer only CONSUMES `OperationalCapabilityCandidate` +{capability_id, source, confidence, verification_status} via an injected mapping +(see contract.py). No mapping DATA lives in product code (tests inject mocks). + +Application/reasoning types, NOT compliance-meta-model classes (architecture +freeze v1.0 untouched). Python 3.9 compatible (no `|` unions). +""" + +from __future__ import annotations + +from enum import Enum +from typing import List, Optional + +from pydantic import BaseModel, Field + +from compliance.reasoning.enums import Confidence + + +class VerificationStatus(str, Enum): + """Trust state of an operational capability — a FOURTH vocabulary. + + Disjoint from ClaimCoverage (Welt 1, customer claim vs obligation), + ComplianceStatus (Welt 2, verified conformity) and DeltaType (RCI). It says + only how well-established a company CAPABILITY is, never whether an obligation + is met. Progression: DECLARED (customer says) -> INFERRED (a certification + implies it) -> CONFIRMED (checked against real evidence); UNKNOWN = no signal. + """ + + DECLARED = "declared" + INFERRED = "inferred" + CONFIRMED = "confirmed" + UNKNOWN = "unknown" + + +# ── raw company inputs (the CompanyContext children) ────────────────────── +class Certification(BaseModel): + certification_id: str # e.g. "ISO27001" + name: str = "" + scope: str = "" # what the cert covers, customer-stated + + +class Declaration(BaseModel): + """A customer statement that they have a capability ("we do patch management").""" + + capability_id: str + statement: str = "" + + +class ExistingProcess(BaseModel): + process_id: str + name: str = "" + + +class ExistingSystem(BaseModel): + system_id: str + name: str = "" + + +class ExistingEvidence(BaseModel): + """A concrete artefact the company already holds (policy, audit log, SBOM ...). + + `proves_capability_id` is the ONLY thing that may lift a capability to + CONFIRMED — and only when a human/engine has attached real evidence. + """ + + evidence_id: str + evidence_type: str = "" # config_export/test_report/policy/audit_log/... + proves_capability_id: Optional[str] = None + + +# ── intermediate: certification -> evidence-of-capability (refinement 1) ── +class CapabilityEvidence(BaseModel): + """A certification does not yield a capability directly — only EVIDENCE for one. + + "Company holds a certified ISMS" is the evidence/claim; capabilities are then + INFERRED from it via the injected (Execution-owned) mapping, never directly. + """ + + source: str # provenance, e.g. "certification:ISO27001" + claim: str = "" + certification_id: str = "" + + +# ── consumed contract type (refinement 2) ───────────────────────────────── +class OperationalCapabilityCandidate(BaseModel): + """The ONLY thing Reasoning consumes from Execution's capability mapping. + + Named "operational" (organisational ability) to stay distinct from later + Product/AI/Safety capabilities. A candidate is always Welt 1 — DECLARED or + INFERRED — and never CONFIRMED on its own. + """ + + capability_id: str + source: str + confidence: Confidence = Confidence.MEDIUM + verification_status: VerificationStatus = VerificationStatus.INFERRED + + +class OperationalCapability(BaseModel): + """A capability the company actually has, CONFIRMED against real evidence.""" + + capability_id: str + verification_status: VerificationStatus + confidence: Confidence = Confidence.MEDIUM + sources: List[str] = Field(default_factory=list) + + +# ── the container Reasoning OWNS (raw inputs) ───────────────────────────── +class CompanyContext(BaseModel): + company_id: str + certifications: List[Certification] = Field(default_factory=list) + declarations: List[Declaration] = Field(default_factory=list) + processes: List[ExistingProcess] = Field(default_factory=list) + systems: List[ExistingSystem] = Field(default_factory=list) + evidence: List[ExistingEvidence] = Field(default_factory=list) + + +# ── derived view (the Company Capability Profile) ───────────────────────── +class CompanyCapabilityProfile(BaseModel): + """Derived: capability evidence + candidates (declared/inferred) + confirmed. + + `candidate_capabilities` NEVER auto-promote to `confirmed_capabilities`; only + explicit ExistingEvidence does that. The hard rule is enforced in engine.py. + """ + + company_id: str + capability_evidence: List[CapabilityEvidence] = Field(default_factory=list) + candidate_capabilities: List[OperationalCapabilityCandidate] = Field(default_factory=list) + confirmed_capabilities: List[OperationalCapability] = Field(default_factory=list) diff --git a/backend-compliance/tests/test_company.py b/backend-compliance/tests/test_company.py new file mode 100644 index 00000000..20988184 --- /dev/null +++ b/backend-compliance/tests/test_company.py @@ -0,0 +1,127 @@ +"""Tests for Company Intelligence (Phase 2A) — Company Capability Profile. + +Acceptance: from a CompanyContext (certifications, declarations, evidence) the +engine derives operational capabilities with a four-state trust model and a HARD +RULE: a certification is NEVER auto-treated as "erfuellt" — at most INFERRED. + +The Certification->Capability mapping is Execution's domain. It is injected here as +a MOCK (the yaml-like dict below lives ONLY in tests); product code ships no table. +""" + +from __future__ import annotations + +from compliance.company import ( + CapabilityMappingEntry, + Certification, + CompanyContext, + Declaration, + ExistingEvidence, + VerificationStatus, + build_company_profile, +) +from compliance.reasoning.enums import Confidence + +# --- MOCK mapping (Execution-owned in reality; here only for the tests) ------- +# mapping: +# ISO27001 -> [cap_patch_management, cap_supplier_management] +MOCK_MAPPING = { + "ISO27001": CapabilityMappingEntry( + capability_ids=["cap_patch_management", "cap_supplier_management"], + confidence=Confidence.MEDIUM, + ) +} + + +def _candidate(profile, capability_id): + return [c for c in profile.candidate_capabilities if c.capability_id == capability_id] + + +def _confirmed_ids(profile): + return {c.capability_id for c in profile.confirmed_capabilities} + + +# A certification yields INFERRED candidates via the injected mapping. +def test_certification_infers_candidates_via_injected_mapping(): + ctx = CompanyContext(company_id="acme", certifications=[Certification(certification_id="ISO27001")]) + profile = build_company_profile(ctx, MOCK_MAPPING) + ids = {c.capability_id for c in profile.candidate_capabilities} + assert ids == {"cap_patch_management", "cap_supplier_management"} + for c in profile.candidate_capabilities: + assert c.verification_status == VerificationStatus.INFERRED + assert c.source == "certification:ISO27001" + + +# Without an injected mapping there are NO inferred capabilities — only the claim. +# This is the architectural guarantee that the table lives only in Execution. +def test_no_mapping_no_inferred_capabilities(): + ctx = CompanyContext(company_id="acme", certifications=[Certification(certification_id="ISO27001")]) + profile = build_company_profile(ctx) # default EMPTY mapping + assert profile.candidate_capabilities == [] + # the certification still produced evidence-of-claim (refinement 1) + assert len(profile.capability_evidence) == 1 + assert profile.capability_evidence[0].source == "certification:ISO27001" + assert profile.capability_evidence[0].certification_id == "ISO27001" + + +# A customer declaration yields a DECLARED candidate. +def test_declaration_yields_declared_candidate(): + ctx = CompanyContext(company_id="acme", declarations=[Declaration(capability_id="cap_patch_management")]) + profile = build_company_profile(ctx, MOCK_MAPPING) + cands = _candidate(profile, "cap_patch_management") + assert len(cands) == 1 + assert cands[0].verification_status == VerificationStatus.DECLARED + + +# declared + inferred coexist as distinct signals for the same capability. +def test_declared_and_inferred_coexist(): + ctx = CompanyContext( + company_id="acme", + certifications=[Certification(certification_id="ISO27001")], + declarations=[Declaration(capability_id="cap_patch_management")], + ) + profile = build_company_profile(ctx, MOCK_MAPPING) + statuses = {c.verification_status for c in _candidate(profile, "cap_patch_management")} + assert statuses == {VerificationStatus.DECLARED, VerificationStatus.INFERRED} + + +# HARD RULE: a certification alone NEVER yields a confirmed capability. +def test_hard_rule_certification_never_confirmed(): + ctx = CompanyContext(company_id="acme", certifications=[Certification(certification_id="ISO27001")]) + profile = build_company_profile(ctx, MOCK_MAPPING) + assert _confirmed_ids(profile) == set() + for c in profile.candidate_capabilities: + assert c.verification_status != VerificationStatus.CONFIRMED + + +# Only real evidence confirms a capability — and it leaves the candidate list. +def test_evidence_confirms_capability(): + ctx = CompanyContext( + company_id="acme", + certifications=[Certification(certification_id="ISO27001")], + evidence=[ExistingEvidence(evidence_id="pol-1", evidence_type="policy", proves_capability_id="cap_patch_management")], + ) + profile = build_company_profile(ctx, MOCK_MAPPING) + assert "cap_patch_management" in _confirmed_ids(profile) + confirmed = [c for c in profile.confirmed_capabilities if c.capability_id == "cap_patch_management"][0] + assert confirmed.verification_status == VerificationStatus.CONFIRMED + assert confirmed.confidence == Confidence.HIGH + assert confirmed.sources == ["pol-1"] + # a confirmed capability is no longer a mere candidate + assert _candidate(profile, "cap_patch_management") == [] + # the un-proven capability stays an inferred candidate + assert _candidate(profile, "cap_supplier_management")[0].verification_status == VerificationStatus.INFERRED + + +# The four-state vocabulary exists and is ordered declared->inferred->confirmed (+unknown). +def test_four_states_present(): + assert {s.value for s in VerificationStatus} == {"declared", "inferred", "confirmed", "unknown"} + + +# verification_status is a FOURTH vocabulary, disjoint from ClaimCoverage and DeltaType. +def test_verification_status_distinct_vocabulary(): + from compliance.rci.schemas import DeltaType + from compliance.reasoning.enums import ClaimCoverage + + verif = {s.value for s in VerificationStatus} + assert verif.isdisjoint({c.value for c in ClaimCoverage}) + assert verif.isdisjoint({d.value for d in DeltaType})