"""Tests for the shared use-case → controls retrieval layer. The SQL paths are verified e2e against the seeded DB; here we pin the pure, deterministic ranking logic and the validation guard. """ import pytest from compliance.domain import NotFoundError from compliance.services.use_case_controls import ( UseCaseControlsService, relevance_score, ) _NET_KW = ("firewall", "tls", "port", "segmentation", "network", "header") def test_relevance_primary_only_baseline(): # primary flag alone (no confidence, no keyword hit) → 0.5 assert relevance_score("x", "y", _NET_KW, True, None) == 0.5 def test_relevance_non_primary_baseline_is_zero(): assert relevance_score("x", "y", _NET_KW, False, None) == 0.0 def test_relevance_confidence_contributes(): # non-primary, no keyword: 0.3 * confidence assert relevance_score("x", "y", _NET_KW, False, 1.0) == 0.3 assert relevance_score("x", "y", _NET_KW, False, 0.5) == 0.15 def test_relevance_keyword_hits_are_capped_at_three(): # three+ distinct keyword hits saturate the content term at +0.2 title = "Firewall and TLS on every port and network segmentation header" assert relevance_score(title, "", _NET_KW, False, None) == 0.2 def test_relevance_keyword_match_is_case_insensitive_over_title_and_objective(): score = relevance_score("FIREWALL", "tls config", _NET_KW, False, None) # two hits → 2/3 * 0.2 ≈ 0.133 assert score == pytest.approx(0.133, abs=0.001) def test_relevance_is_clamped_to_one(): title = "firewall tls port" # 3 hits → +0.2 assert relevance_score(title, "", _NET_KW, True, 1.0) == 1.0 def test_relevance_no_keyword_tokens_yields_zero_content_term(): assert relevance_score("anything", "here", (), True, 1.0) == 0.8 def test_controls_for_unknown_use_case_raises_not_found(): svc = UseCaseControlsService(db=None) # guard runs before any DB access with pytest.raises(NotFoundError): svc.controls_for_use_case("does_not_exist")