"""Journey Matcher — the Delta -> Journey function of the Capability Delta Engine. Three INDEPENDENT functions now compose the pipeline, each a different problem, all interchangeable: 1. Evidence -> Capability (Company 2A) 2. Capability -> Delta (RS-005, transition_reasoning) 3. Delta -> Journey (THIS module) The paradigm shift: a Journey is no longer the CAUSE (Goal -> Journey -> Delta) but the EXPLANATION (Goal -> Required -> Delta -> Journey). The matcher does NOT look at certifications, regulations, tenders, OEM specs or the goal — it looks ONLY at the Capability Delta and asks: which known journeys describe exactly this delta? Output is a ranked, auditable explanation ("Journey A explains 82% of the delta, because 8 of 10 missing capabilities are identical, same target type, ..."). Deliberately DUMB and deterministic: pure set overlap, NO ML, NO embeddings, NO LLM. A learning ranker can be layered ON TOP later; this core stays auditable. Journey signatures are INJECTED (certificate- agnostic capability clusters), never loaded here — the engine stays hermetic. No new corpus, no graph/meta-model class (freeze v1.0). Python 3.9 compatible. Honesty: `score` is the share of the DELTA a journey explains (recall over the customer's missing capabilities), never a "fit" or a compliance verdict. `journey_only` documents where a journey reaches BEYOND this delta, so a broad journey that explains everything is not silently preferred. """ from __future__ import annotations from typing import List, Optional, Sequence from .schemas import ( JourneyMatch, JourneyMatchReason, JourneyMatchResult, JourneySignature, MatchContext, ) def _context_signals(journey: JourneySignature, context: Optional[MatchContext]) -> List[str]: """Corroborating reasons only — these are documented, they never change the score.""" if context is None: return [] signals: List[str] = [] if context.target_type and journey.target_type and context.target_type == journey.target_type: signals.append("gleiche Zielart") if context.industry and journey.industry and context.industry == journey.industry: signals.append("gleiche Branche") if context.product_type and journey.product_type and context.product_type == journey.product_type: signals.append("gleicher Produkttyp") return signals def match_journeys( delta: Sequence[str], journeys: Sequence[JourneySignature], context: Optional[MatchContext] = None, ) -> JourneyMatchResult: """Rank known journeys by the share of the Capability Delta they EXPLAIN. `delta` = the customer's MISSING capabilities (from RS-005). `journeys` = injected, certificate- agnostic signatures. score = |delta INTERSECT pattern| / |delta|. Ranking is deterministic: score desc, then context-signal count desc (corroboration only), then journey_id asc. Context never changes the score — only the documented reasons. Pure; no I/O; computed-not-stored. """ delta_set = set(delta) n = len(delta_set) matches: List[JourneyMatch] = [] for j in journeys: pattern = set(j.capability_pattern) matched = sorted(delta_set & pattern) score = (len(matched) / n) if n else 0.0 signals = _context_signals(j, context) reason = JourneyMatchReason( matched_capabilities=matched, unexplained_delta=sorted(delta_set - pattern), journey_only=sorted(pattern - delta_set), context_signals=signals, ) matches.append( JourneyMatch( journey_id=j.journey_id, label=j.label, score=round(score, 2), explains="%d von %d fehlenden Capabilities" % (len(matched), n), reason=reason, ) ) matches.sort(key=lambda m: (-m.score, -len(m.reason.context_signals), m.journey_id)) best = matches[0] if matches and matches[0].score > 0.0 else None headline = ( "%d Journeys erklaeren das Delta; beste: %s (%d%% des Deltas)" % (sum(1 for m in matches if m.score > 0.0), best.label, round(best.score * 100)) if best else "Keine bekannte Journey erklaert dieses Delta (neue Journey-Kandidatin)" ) return JourneyMatchResult(delta_size=n, matches=matches, best=best, headline=headline)