feat: Journey Matcher — the delta explains the journey (Delta -> Journey, ADR-011)

The sanctioned last architectural building block. Reverses the order: not Goal -> Journey -> Delta but Goal -> Required -> Delta -> Journey. A Journey is the EXPLANATION of the Capability Delta, not its cause — so this is a Matcher/Explainer, not a Selector. New module compliance/journey_matcher/ = the third independent, interchangeable function of the pipeline, beside Company 2A (Evidence -> Capability) and RS-005 (Capability -> Delta): match_journeys(delta, journeys, context) -> ranked, auditable explanation - Looks ONLY at the Capability Delta — never at certificates, regulation, tenders or the goal. Journey signatures are certificate-agnostic capability clusters (Input -> Output pattern). - score = share of the delta a journey explains (recall over the missing capabilities); journey_only documents where a journey reaches beyond the delta so a broad journey is not silently preferred. - Deliberately dumb + deterministic (pure set overlap; NO ML/embeddings/LLM), fully auditable (matched / unexplained / journey_only / context signals); a learning ranker can sit on top later. - Signatures injected, engine hermetic. mypy --strict clean. Validated on the real patterns (demo): a CRA+MaschinenVO delta ranks the convergence journey 100%, "ISO27001 -> CRA" 56% (misses the machine-safety caps), "ISMS -> TISAX" 0%. This resolves the "Scope -> Journey" jump from Customer Mission #1. Freeze exception explicitly authorised; non-runtime -> no deploy. 12 tests pass, check-loc 0.
2026-06-28 10:36:43 +02:00
parent 3c6e2a2acc
commit 80bf1993e0
8 changed files with 511 additions and 0 deletions
@@ -0,0 +1,80 @@
+"""Unit tests for the Journey Matcher (Delta -> Journey).
+
+The matcher ranks known journeys by the share of the Capability Delta they EXPLAIN, using ONLY the
+delta and injected capability-cluster signatures — deterministic, auditable, no ML. These tests pin
+the score semantics (recall over the delta), the ranking order, the audit reasons, and that context
+corroborates without ever changing the score.
+"""
+
+from __future__ import annotations
+
+from compliance.journey_matcher import (
+    JourneySignature,
+    MatchContext,
+    match_journeys,
+)
+
+
+def _sig(jid, pattern, **kw):
+    return JourneySignature(journey_id=jid, label=jid, capability_pattern=pattern, **kw)
+
+
+def test_score_is_share_of_delta_explained():
+    delta = ["a", "b", "c", "d", "e"]
+    j = _sig("J", ["a", "b", "c", "d"])           # explains 4 of 5
+    res = match_journeys(delta, [j])
+    assert res.matches[0].score == 0.8
+    assert res.matches[0].explains == "4 von 5 fehlenden Capabilities"
+
+
+def test_ranking_orders_by_explanatory_power():
+    delta = ["a", "b", "c", "d"]
+    journeys = [
+        _sig("low", ["a"]),                        # 1/4
+        _sig("high", ["a", "b", "c"]),             # 3/4
+        _sig("mid", ["a", "b"]),                   # 2/4
+    ]
+    res = match_journeys(delta, journeys)
+    assert [m.journey_id for m in res.matches] == ["high", "mid", "low"]
+    assert res.best.journey_id == "high"
+
+
+def test_audit_reason_partitions_the_delta():
+    delta = ["a", "b", "c"]
+    j = _sig("J", ["b", "c", "x", "y"])            # explains b,c; misses a; reaches beyond into x,y
+    r = match_journeys(delta, [j]).matches[0].reason
+    assert r.matched_capabilities == ["b", "c"]
+    assert r.unexplained_delta == ["a"]
+    assert r.journey_only == ["x", "y"]
+
+
+def test_context_corroborates_but_never_changes_score():
+    delta = ["a", "b"]
+    same = _sig("same", ["a", "b"], target_type="regulation")
+    other = _sig("other", ["a", "b"], target_type="contract")
+    ctx = MatchContext(target_type="regulation")
+    res = match_journeys(delta, [other, same], ctx)
+    # identical score (1.0) -> tie broken by context-signal count: 'same' first
+    assert res.matches[0].score == res.matches[1].score == 1.0
+    assert res.matches[0].journey_id == "same"
+    assert "gleiche Zielart" in res.matches[0].reason.context_signals
+    assert res.matches[1].reason.context_signals == []
+
+
+def test_deterministic_tiebreak_by_journey_id():
+    delta = ["a", "b"]
+    res = match_journeys(delta, [_sig("zeta", ["a"]), _sig("alpha", ["a"])])
+    assert [m.journey_id for m in res.matches] == ["alpha", "zeta"]
+
+
+def test_no_journey_explains_the_delta():
+    res = match_journeys(["a", "b"], [_sig("J", ["x", "y"])])
+    assert res.best is None
+    assert res.matches[0].score == 0.0
+    assert "neue Journey-Kandidatin" in res.headline
+
+
+def test_empty_delta_yields_no_best():
+    res = match_journeys([], [_sig("J", ["a"])])
+    assert res.delta_size == 0
+    assert res.best is None
@@ -0,0 +1,51 @@
+"""Journey Matcher demo test — Delta -> Journey on the real transition patterns.
+
+Pins that the matcher, given ONLY a real Capability Delta (a multi-cert company wanting CRA +
+MaschinenVO), correctly ranks the known journeys by explanatory power: the convergence journey
+explains the whole delta, the CRA-only journey explains the security part but misses the machine-
+safety capabilities, and the TISAX journey is irrelevant. End-to-end through the real engines.
+"""
+
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+
+
+def _run():
+    root = os.path.join(os.path.dirname(__file__), "..")
+    r = subprocess.run(
+        [sys.executable, "reference_scenarios/journey_matcher_demo.py"],
+        cwd=root, env={**os.environ, "PYTHONPATH": "."}, capture_output=True, text=True,
+    )
+    assert r.returncode == 0, r.stderr
+    return r.stdout
+
+
+def test_runs_end_to_end():
+    out = _run()
+    assert "Journey Matcher" in out
+    assert "Goal → Required → Delta → Journey" in out
+
+
+def test_convergence_journey_explains_the_whole_delta():
+    out = _run()
+    assert "**ISO27001 -> CRA + MaschinenVO** | 9 von 9 fehlenden Capabilities | 100% |" in out
+
+
+def test_partial_journey_misses_machine_safety():
+    out = _run()
+    # CRA-only journey explains the security part but not the MaschinenVO capabilities
+    assert "**ISO27001 -> CRA** | 5 von 9 fehlenden Capabilities | 56% |" in out
+
+
+def test_irrelevant_journey_scores_zero():
+    out = _run()
+    assert "**ISMS -> TISAX** | 0 von 9 fehlenden Capabilities | 0% |" in out
+
+
+def test_match_is_auditable():
+    out = _run()
+    assert "auditierbar, keine Blackbox" in out
+    assert "Erklärte Capabilities" in out