feat(profile): CanonicalProductRegulatoryProfile convergence layer (types + mappers + tests)

ONE canonical product profile so the Go gap engine and the Python reasoning
engine stop diverging ("SPS mit Remote Access" means the same everywhere).
gap.ProductProfile LEADS; the reasoning ProductProfile becomes an adapter/DTO.
Types + mappers only — no regulation logic, no Go changes, no UI, no new questions.

- CanonicalProductRegulatoryProfile mirrors gap.ProductProfile + the Navigator
  gaps the audit found: economic-operator role, radio_module, generates_usage_data,
  lifecycle_phase, structured BOM (ProductComponent), safety-vs-security split,
  machine-vs-component + a forward-looking EnvironmentalImpact domain (wastewater/
  air/chemicals triggers — fields only, no rules yet).
- Mappers: from_product_wizard (lossless), from_company_profile (prefill incl.
  the machineBuilder block), to_gap_profile (emits the unchanged gap JSON shape),
  to_reasoning_profile (projects into the reasoning ProductProfile; AI stays
  delegated to ai-act/ucca). Only profile->reasoning is coupled; reasoning stays
  hermetic.
- 10 tests = the 10 acceptance criteria incl. ProductWizard round-trip lossless,
  markets no longer forced ['EU'], and canonical->reasoning->discover_scope
  proving one semantic profile drives the engine. 33 tests green, mypy clean.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-26 09:52:46 +02:00
parent 6673c8052b
commit 739a477d3f
7 changed files with 622 additions and 0 deletions
@@ -0,0 +1,38 @@
"""Product profile convergence layer.
ONE canonical product profile (`CanonicalProductRegulatoryProfile`) that the Go
gap engine and the Python reasoning engine both project from — so "SPS mit
Remote Access" means the same thing everywhere. gap.ProductProfile leads; the
reasoning ProductProfile is an adapter/DTO. Types + mappers only — no regulation
logic, no UI, no new questions.
"""
from __future__ import annotations
from .canonical import (
CanonicalLifecyclePhase,
CanonicalProductRegulatoryProfile,
CanonicalProductType,
ComponentKind,
EconomicOperatorRole,
EnvironmentalImpact,
ProductComponent,
)
from .from_company_profile import from_company_profile
from .from_product_wizard import from_product_wizard
from .to_gap import to_gap_profile
from .to_reasoning import to_reasoning_profile
__all__ = [
"CanonicalProductRegulatoryProfile",
"CanonicalProductType",
"EconomicOperatorRole",
"CanonicalLifecyclePhase",
"ComponentKind",
"ProductComponent",
"EnvironmentalImpact",
"from_product_wizard",
"from_company_profile",
"to_gap_profile",
"to_reasoning_profile",
]
@@ -0,0 +1,158 @@
"""CanonicalProductRegulatoryProfile — the single semantic product profile.
Convergence layer (spec 2026-06-26): instead of letting the Go `gap.ProductProfile`
and the Python reasoning `ProductProfile` drift, ONE canonical type is the source
of truth. The Go gap engine LEADS (it carries real engine logic), so the canonical
mirrors gap's field names and adds the Navigator gaps the audit found missing
(economic-operator role, radio module, generates_usage_data, lifecycle phase,
structured BOM, safety-vs-security split, machine-vs-component) plus a
forward-looking Environmental-Impact domain.
No regulation logic lives here — types only. Mappers live in sibling modules.
Python 3.9 compatible (no `|` unions).
"""
from __future__ import annotations
from enum import Enum
from typing import List, Optional
from pydantic import BaseModel, Field
class CanonicalProductType(str, Enum): # mirrors gap.ProductType
SOFTWARE = "software"
HARDWARE = "hardware"
IOT = "iot"
SAAS = "saas"
EXCHANGE = "exchange"
MEDICAL_DEVICE = "medical_device"
MACHINERY = "machinery"
OTHER = "other"
class EconomicOperatorRole(str, Enum): # CE/CRA role — gap.ProductProfile has none
MANUFACTURER = "manufacturer"
IMPORTER = "importer"
DISTRIBUTOR = "distributor"
INTEGRATOR = "integrator"
OPERATOR = "operator"
SERVICE_PROVIDER = "service_provider"
class CanonicalLifecyclePhase(str, Enum):
DEVELOPMENT = "development"
PLACING_ON_MARKET = "placing_on_market"
OPERATION = "operation"
MAINTENANCE = "maintenance"
UPDATE = "update"
END_OF_LIFE = "end_of_life"
class ComponentKind(str, Enum):
MOTOR = "motor"
PUMP = "pump"
HEATING = "heating"
COOLING = "cooling"
CONTROLLER = "controller"
PLC = "plc"
HMI = "hmi"
SENSOR = "sensor"
ACTUATOR = "actuator"
CAMERA = "camera"
NETWORK_INTERFACE = "network_interface"
RADIO_MODULE = "radio_module"
CHEMICAL_DOSING = "chemical_dosing"
WATER_INLET = "water_inlet"
WASTEWATER_OUTLET = "wastewater_outlet"
BATTERY = "battery"
OTHER = "other"
class ProductComponent(BaseModel):
"""One structured BOM node — these nodes are what later trigger domains."""
name: str
kind: ComponentKind = ComponentKind.OTHER
notes: Optional[str] = None
class EnvironmentalImpact(BaseModel):
"""Forward-looking Umweltmedien-Trigger (own Navigator domain).
No regulation logic consumes these yet — profile fields only, so the model
is not blind to wastewater/air/chemicals/waste questions when that domain
is wired later (AbwV/WRRL/REACH/CLP/IED/BImSchG ...).
"""
discharges_to_wastewater: Optional[bool] = None
uses_cleaning_chemicals: Optional[bool] = None
supplies_chemicals: Optional[bool] = None
emits_to_air: Optional[bool] = None
uses_solvents: Optional[bool] = None
creates_waste: Optional[bool] = None
contains_restricted_substances: Optional[bool] = None
consumes_energy_or_water: Optional[bool] = None
has_cooling_or_spraying_water: Optional[bool] = None
class CanonicalProductRegulatoryProfile(BaseModel):
# --- identity ---
name: str = ""
description: str = ""
product_type: Optional[CanonicalProductType] = None
product_profile_id: Optional[str] = None
tenant_id: Optional[str] = None
iace_project_id: Optional[str] = None
# --- gap-native lists ---
technologies: List[str] = Field(default_factory=list)
data_processing: List[str] = Field(default_factory=list)
markets: List[str] = Field(default_factory=list) # real list — never hardcoded ['EU']
existing_certifications: List[str] = Field(default_factory=list)
applied_norms: List[str] = Field(default_factory=list)
# --- gap-native product / IST-state booleans (tri-state: None = unknown) ---
connected_to_internet: Optional[bool] = None
has_software_updates: Optional[bool] = None
uses_ai: Optional[bool] = None
processes_personal_data: Optional[bool] = None
is_critical_infra_supplier: Optional[bool] = None
has_risk_assessment: Optional[bool] = None
has_technical_file: Optional[bool] = None
has_operating_manual: Optional[bool] = None
has_sbom: Optional[bool] = None
has_vuln_management: Optional[bool] = None
has_update_mechanism: Optional[bool] = None
has_incident_response: Optional[bool] = None
has_supply_chain_mgmt: Optional[bool] = None
ce_marking_since: Optional[str] = None
product_age: Optional[str] = None
# --- NEW Navigator-gap fields (audit 2026-06-26) ---
economic_operator_role: Optional[EconomicOperatorRole] = None
has_radio_module: Optional[bool] = None
generates_usage_data: Optional[bool] = None
lifecycle_phase: Optional[CanonicalLifecyclePhase] = None
components: List[ProductComponent] = Field(default_factory=list)
has_safety_function: Optional[bool] = None
safety_function_description: Optional[str] = None
has_security_function: Optional[bool] = None # safety vs security split
has_remote_access: Optional[bool] = None
has_embedded_software: Optional[bool] = None
is_machine: Optional[bool] = None
is_component: Optional[bool] = None
is_spare_part: Optional[bool] = None
# --- company / market context (NIS2 + scope; from company-profile) ---
b2b_or_b2c: Optional[str] = None
sector_industry: Optional[str] = None
company_size: Optional[str] = None
primary_jurisdiction: Optional[str] = None
# --- AI context (classification stays delegated to ai-act/ucca) ---
ai_integration_type: List[str] = Field(default_factory=list)
human_oversight_level: Optional[str] = None
# --- forward-looking environmental domain ---
environmental: EnvironmentalImpact = Field(default_factory=EnvironmentalImpact)
@@ -0,0 +1,59 @@
"""company-profile -> CanonicalProductRegulatoryProfile (prefill, acceptance #2).
Pulls master data (industry, business model, size, markets) and the conditional
`machine_builder` block (camelCase JSONB keys, defined frontend-side) so the user
re-answers nothing. The machineBuilder block is the richest product/safety/
connectivity source — note it is industry-gated in the UI, so a prefill may find
it empty; that is fine (fields stay None = unknown).
"""
from __future__ import annotations
from typing import Any, Dict, List
from .canonical import CanonicalProductRegulatoryProfile
_EU_MEMBER_HINTS = {"DE", "AT", "FR", "IT", "NL", "LU", "LI", "EU", "EWR", "EEA", "DACH"}
def _markets(p: Dict[str, Any], mb: Dict[str, Any]) -> List[str]:
out: List[str] = []
for source in (p.get("target_markets"), mb.get("exportMarkets"), [p.get("primary_jurisdiction")], [p.get("headquarters_country")]):
for m in source or []:
if m and m not in out:
out.append(m)
return out
def _is_machine(mb: Dict[str, Any]) -> Any:
types = mb.get("productTypes")
if types:
return True
return None
def from_company_profile(profile: Dict[str, Any]) -> CanonicalProductRegulatoryProfile:
p = profile
mb = p.get("machine_builder") or {}
contains_ai = mb.get("containsAI")
uses_ai = contains_ai if contains_ai is not None else p.get("uses_ai")
return CanonicalProductRegulatoryProfile(
description=mb.get("productDescription") or "",
sector_industry=p.get("industry") or None,
b2b_or_b2c=p.get("business_model") or None,
company_size=p.get("company_size") or None,
primary_jurisdiction=p.get("primary_jurisdiction") or None,
markets=_markets(p, mb),
uses_ai=uses_ai,
ai_integration_type=list(mb.get("aiIntegrationType") or []),
human_oversight_level=mb.get("humanOversightLevel") or None,
has_embedded_software=mb.get("containsFirmware"),
has_safety_function=mb.get("hasSafetyFunction"),
safety_function_description=mb.get("safetyFunctionDescription") or None,
has_remote_access=mb.get("hasRemoteAccess"),
connected_to_internet=mb.get("isNetworked"),
has_software_updates=mb.get("hasOTAUpdates"),
has_risk_assessment=mb.get("hasRiskAssessment"),
is_machine=_is_machine(mb),
is_critical_infra_supplier=mb.get("criticalSectorClients"),
)
@@ -0,0 +1,50 @@
"""ProductWizard payload -> CanonicalProductRegulatoryProfile (lossless).
The gap-analysis ProductWizard POSTs exactly the gap.ProductProfile JSON shape
(see admin-compliance/.../ProductWizard.tsx handleSubmit). This mapper copies
every gap field verbatim so that `to_gap_profile(from_product_wizard(p))`
reproduces the gap subset of `p` byte-for-byte (acceptance #1). New Navigator
fields the wizard does not ask stay None.
"""
from __future__ import annotations
from typing import Any, Dict, Optional
from .canonical import CanonicalProductRegulatoryProfile, CanonicalProductType
def _as_product_type(value: Any) -> Optional[CanonicalProductType]:
try:
return CanonicalProductType(value)
except ValueError:
return None
def from_product_wizard(payload: Dict[str, Any]) -> CanonicalProductRegulatoryProfile:
g = payload.get
return CanonicalProductRegulatoryProfile(
name=g("name", ""),
description=g("description", ""),
product_type=_as_product_type(g("product_type")),
technologies=list(g("technologies") or []),
data_processing=list(g("data_processing") or []),
markets=list(g("markets") or []),
existing_certifications=list(g("existing_certifications") or []),
applied_norms=list(g("applied_norms") or []),
connected_to_internet=g("connected_to_internet"),
has_software_updates=g("has_software_updates"),
uses_ai=g("uses_ai"),
processes_personal_data=g("processes_personal_data"),
is_critical_infra_supplier=g("is_critical_infra_supplier"),
has_risk_assessment=g("has_risk_assessment"),
has_technical_file=g("has_technical_file"),
has_operating_manual=g("has_operating_manual"),
has_sbom=g("has_sbom"),
has_vuln_management=g("has_vuln_management"),
has_update_mechanism=g("has_update_mechanism"),
has_incident_response=g("has_incident_response"),
has_supply_chain_mgmt=g("has_supply_chain_mgmt"),
ce_marking_since=g("ce_marking_since"),
product_age=g("product_age"),
)
@@ -0,0 +1,41 @@
"""CanonicalProductRegulatoryProfile -> gap.ProductProfile JSON shape.
Emits exactly the keys the Go gap engine already consumes (gap/models.go json
tags), so the gap engine runs UNCHANGED — the canonical is a superset and gap is
its lossless projection. Canonical-only fields (role/radio/components/...) are
intentionally not emitted here; they reach the reasoning side via to_reasoning.
"""
from __future__ import annotations
from typing import Any, Dict
from .canonical import CanonicalProductRegulatoryProfile
def to_gap_profile(c: CanonicalProductRegulatoryProfile) -> Dict[str, Any]:
return {
"name": c.name,
"description": c.description,
"product_type": c.product_type.value if c.product_type else "",
"technologies": list(c.technologies),
"data_processing": list(c.data_processing),
"markets": list(c.markets),
"existing_certifications": list(c.existing_certifications),
"applied_norms": list(c.applied_norms),
"connected_to_internet": bool(c.connected_to_internet),
"has_software_updates": bool(c.has_software_updates),
"uses_ai": bool(c.uses_ai),
"processes_personal_data": bool(c.processes_personal_data),
"is_critical_infra_supplier": bool(c.is_critical_infra_supplier),
"has_risk_assessment": bool(c.has_risk_assessment),
"has_technical_file": bool(c.has_technical_file),
"has_operating_manual": bool(c.has_operating_manual),
"has_sbom": bool(c.has_sbom),
"has_vuln_management": bool(c.has_vuln_management),
"has_update_mechanism": bool(c.has_update_mechanism),
"has_incident_response": bool(c.has_incident_response),
"has_supply_chain_mgmt": bool(c.has_supply_chain_mgmt),
"ce_marking_since": c.ce_marking_since if c.ce_marking_since is not None else "",
"product_age": c.product_age if c.product_age is not None else "",
}
@@ -0,0 +1,88 @@
"""CanonicalProductRegulatoryProfile -> reasoning ProductProfile (adapter/DTO).
The reasoning engine stays the consumer, never the source of truth (spec): the
canonical leads, this projects it into the Python reasoning ProductProfile so the
Reasoning engine and the Go gap engine run off ONE semantic profile (acceptance
#10). AI classification is NOT done here — only `uses_ai` is forwarded; risk
classification stays delegated to ai-act/ucca (acceptance #3).
This is the ONLY one-way coupling profile -> reasoning; reasoning never imports
profile, so the reasoning layer stays hermetic.
"""
from __future__ import annotations
from typing import List, Optional
from compliance.reasoning.enums import ManufacturerRole, MarketModel, ProductLifecyclePhase
from compliance.reasoning.schemas import ProductProfile
from .canonical import CanonicalProductRegulatoryProfile, CanonicalProductType
_SOFTWARE_TYPES = {CanonicalProductType.SOFTWARE, CanonicalProductType.SAAS, CanonicalProductType.IOT}
_SOFTWARE_TECH = {"ai", "api", "database", "encryption", "ota_updates", "cloud", "blockchain"}
_EU_HINTS = {"DE", "AT", "FR", "IT", "NL", "LU", "LI", "EU", "EWR", "EEA", "DACH"}
_B2X = {"B2B": MarketModel.B2B, "B2C": MarketModel.B2C, "B2B_B2C": MarketModel.BOTH, "B2B2C": MarketModel.BOTH}
def _or_none(*values: Optional[bool]) -> Optional[bool]:
"""True if any value is truthy; None if all are None/absent; else False."""
if any(v is True for v in values):
return True
if all(v is None for v in values):
return None
return False
def _has_software(c: CanonicalProductRegulatoryProfile) -> Optional[bool]:
type_sig = True if c.product_type in _SOFTWARE_TYPES else None
tech_sig = True if (set(c.technologies) & _SOFTWARE_TECH) else None
return _or_none(c.has_embedded_software, c.has_software_updates, c.uses_ai, type_sig, tech_sig)
def _eu_market(markets: List[str]) -> Optional[bool]:
if not markets:
return None
return True if (set(markets) & _EU_HINTS) else False
def _has_radio(c: CanonicalProductRegulatoryProfile) -> Optional[bool]:
if c.has_radio_module is not None:
return c.has_radio_module
if any(comp.kind.value == "radio_module" for comp in c.components):
return True
return None
def to_reasoning_profile(c: CanonicalProductRegulatoryProfile) -> ProductProfile:
role = ManufacturerRole(c.economic_operator_role.value) if c.economic_operator_role else None
phase = ProductLifecyclePhase(c.lifecycle_phase.value) if c.lifecycle_phase else None
b2x = _B2X.get(c.b2b_or_b2c) if c.b2b_or_b2c else None
is_machine = c.is_machine if c.is_machine is not None else (
True if c.product_type == CanonicalProductType.MACHINERY else None
)
generates_data = c.generates_usage_data if c.generates_usage_data is not None else (
True if "telemetry" in c.data_processing else None
)
return ProductProfile(
product_name=c.name or "Produkt",
product_profile_id=c.product_profile_id,
manufacturer_role=role,
product_type=[c.product_type.value] if c.product_type else [],
has_software=_has_software(c),
has_embedded_software=c.has_embedded_software,
has_remote_access=c.has_remote_access,
has_cloud_connection=True if "cloud" in c.technologies else None,
has_ai_functionality=c.uses_ai,
has_radio_module=_has_radio(c),
has_safety_function=c.has_safety_function,
generates_usage_data=generates_data,
is_machine=is_machine,
is_component=c.is_component,
is_spare_part=c.is_spare_part,
eu_market=_eu_market(c.markets),
b2b_or_b2c=b2x,
lifecycle_phase=phase,
company_size=c.company_size,
sector=c.sector_industry,
)