breakpilot-compliance/backend-compliance/compliance/journey_matcher/schemas.py

"""Schemas for the Journey Matcher — the Delta -> Journey function of the Capability Delta Engine.

Derived views (computed-not-stored): nothing here is persisted; every match is recomputed from the
input delta + injected journey signatures each call. No new corpus, no graph (freeze v1.0).
Python 3.9 compatible (no `|` unions).
"""

from __future__ import annotations

from typing import List, Optional

from pydantic import BaseModel, Field


class JourneySignature(BaseModel):
    """A known journey described ONLY by its capability pattern (Input cluster -> Output cluster).

    Deliberately certificate-/regulation-agnostic: the match uses `capability_pattern` alone. `label`
    and the context fields exist for the human-auditable explanation, NEVER for the score. (Today the
    signatures are derived from the transition patterns; the IDs like "ISO27001->CRA" are just one way
    to describe the clusters — the matcher never reads them.)
    """

    journey_id: str
    label: str
    capability_pattern: List[str] = Field(default_factory=list)     # OUTPUT cluster: the delta this journey is about
    assumed_capabilities: List[str] = Field(default_factory=list)   # INPUT cluster: typically already present
    industry: Optional[str] = None
    product_type: Optional[str] = None
    target_type: Optional[str] = None        # context only: regulation / certification / contract / environmental


class MatchContext(BaseModel):
    """Optional corroborating context — surfaced as documented reasons, never part of the score."""

    industry: Optional[str] = None
    product_type: Optional[str] = None
    target_type: Optional[str] = None


class JourneyMatchReason(BaseModel):
    """The auditable WHY behind one match — everything a reviewer needs, no opaque score."""

    matched_capabilities: List[str] = Field(default_factory=list)   # delta INTERSECT pattern (what it explains)
    unexplained_delta: List[str] = Field(default_factory=list)      # delta - pattern (what it does NOT explain)
    journey_only: List[str] = Field(default_factory=list)           # pattern - delta (journey covers, not needed here)
    context_signals: List[str] = Field(default_factory=list)        # "gleiche Zielart", "gleiche Branche", ...


class JourneyMatch(BaseModel):
    """One known journey, ranked by how much of the delta it EXPLAINS (not how well it 'fits')."""

    journey_id: str
    label: str
    score: float = 0.0                       # |delta INTERSECT pattern| / |delta|, 0..1: share of the delta explained
    explains: str = ""                       # "8 von 10 fehlenden Capabilities"
    reason: JourneyMatchReason


class JourneyMatchResult(BaseModel):
    """Ranked known journeys that EXPLAIN a Capability Delta. Journey = explanation, not cause."""

    delta_size: int = 0
    matches: List[JourneyMatch] = Field(default_factory=list)       # ranked desc by score
    best: Optional[JourneyMatch] = None
    headline: str = ""