breakpilot-compliance/backend-compliance/tests/test_journey_matcher.py

"""Unit tests for the Journey Matcher (Delta -> Journey).

The matcher ranks known journeys by the share of the Capability Delta they EXPLAIN, using ONLY the
delta and injected capability-cluster signatures — deterministic, auditable, no ML. These tests pin
the score semantics (recall over the delta), the ranking order, the audit reasons, and that context
corroborates without ever changing the score.
"""

from __future__ import annotations

from compliance.journey_matcher import (
    JourneySignature,
    MatchContext,
    match_journeys,
)


def _sig(jid, pattern, **kw):
    return JourneySignature(journey_id=jid, label=jid, capability_pattern=pattern, **kw)


def test_score_is_share_of_delta_explained():
    delta = ["a", "b", "c", "d", "e"]
    j = _sig("J", ["a", "b", "c", "d"])           # explains 4 of 5
    res = match_journeys(delta, [j])
    assert res.matches[0].score == 0.8
    assert res.matches[0].explains == "4 von 5 fehlenden Capabilities"


def test_ranking_orders_by_explanatory_power():
    delta = ["a", "b", "c", "d"]
    journeys = [
        _sig("low", ["a"]),                        # 1/4
        _sig("high", ["a", "b", "c"]),             # 3/4
        _sig("mid", ["a", "b"]),                   # 2/4
    ]
    res = match_journeys(delta, journeys)
    assert [m.journey_id for m in res.matches] == ["high", "mid", "low"]
    assert res.best.journey_id == "high"


def test_audit_reason_partitions_the_delta():
    delta = ["a", "b", "c"]
    j = _sig("J", ["b", "c", "x", "y"])            # explains b,c; misses a; reaches beyond into x,y
    r = match_journeys(delta, [j]).matches[0].reason
    assert r.matched_capabilities == ["b", "c"]
    assert r.unexplained_delta == ["a"]
    assert r.journey_only == ["x", "y"]


def test_context_corroborates_but_never_changes_score():
    delta = ["a", "b"]
    same = _sig("same", ["a", "b"], target_type="regulation")
    other = _sig("other", ["a", "b"], target_type="contract")
    ctx = MatchContext(target_type="regulation")
    res = match_journeys(delta, [other, same], ctx)
    # identical score (1.0) -> tie broken by context-signal count: 'same' first
    assert res.matches[0].score == res.matches[1].score == 1.0
    assert res.matches[0].journey_id == "same"
    assert "gleiche Zielart" in res.matches[0].reason.context_signals
    assert res.matches[1].reason.context_signals == []


def test_deterministic_tiebreak_by_journey_id():
    delta = ["a", "b"]
    res = match_journeys(delta, [_sig("zeta", ["a"]), _sig("alpha", ["a"])])
    assert [m.journey_id for m in res.matches] == ["alpha", "zeta"]


def test_no_journey_explains_the_delta():
    res = match_journeys(["a", "b"], [_sig("J", ["x", "y"])])
    assert res.best is None
    assert res.matches[0].score == 0.0
    assert "neue Journey-Kandidatin" in res.headline


def test_empty_delta_yields_no_best():
    res = match_journeys([], [_sig("J", ["a"])])
    assert res.delta_size == 0
    assert res.best is None