0a6e57ac02
2-Pass-Haiku-Klassifikation (konservativ + Re-Confirm jeder Nicht-unternehmen- Einstufung) der Review-Tier-Atome: wer muss die Pflicht erfuellen? - Migration 155: atom_classification.addressee (unternehmen/oeffentliche_stelle/ aufsichtsbefugnis/staat_eu/dritter/meta), additiv, kein CHECK. [migration-approved] - Service: addressee + applicable + is_gov pro Control; include_out_of_scope-Param (Default false -> out-of-scope advisory ausgeblendet, NIE geloescht); out_of_scope_count. Pure Helper addressee_applicable/addressee_is_gov (+ Tests). - Route: optionaler include_out_of_scope-Query (contract-safe, additiv). - Frontend: GOV-Chip (additiv) + "kein Kunden-Pruefaspekt"-Chip + 1-Klick-Toggle zum Einblenden der out-of-scope-Atome. Daten: 40.859 Adressat-Tags auf macmini geladen (81% applicable, 19% advisory, 3.146 GOV). Konservativ: NULL/Unklar = applicable. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
94 lines
3.2 KiB
Python
94 lines
3.2 KiB
Python
"""Tests for the shared use-case → controls retrieval layer.
|
|
|
|
The SQL paths are verified e2e against the seeded DB; here we pin the pure,
|
|
deterministic ranking logic and the validation guard.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from compliance.domain import NotFoundError
|
|
from compliance.services.use_case_controls import (
|
|
UseCaseControlsService,
|
|
addressee_applicable,
|
|
addressee_is_gov,
|
|
relevance_score,
|
|
source_type,
|
|
tier_label,
|
|
)
|
|
|
|
_NET_KW = ("firewall", "tls", "port", "segmentation", "network", "header")
|
|
|
|
|
|
def test_relevance_primary_only_baseline():
|
|
# primary flag alone (no confidence, no keyword hit) → 0.5
|
|
assert relevance_score("x", "y", _NET_KW, True, None) == 0.5
|
|
|
|
|
|
def test_relevance_non_primary_baseline_is_zero():
|
|
assert relevance_score("x", "y", _NET_KW, False, None) == 0.0
|
|
|
|
|
|
def test_relevance_confidence_contributes():
|
|
# non-primary, no keyword: 0.3 * confidence
|
|
assert relevance_score("x", "y", _NET_KW, False, 1.0) == 0.3
|
|
assert relevance_score("x", "y", _NET_KW, False, 0.5) == 0.15
|
|
|
|
|
|
def test_relevance_keyword_hits_are_capped_at_three():
|
|
# three+ distinct keyword hits saturate the content term at +0.2
|
|
title = "Firewall and TLS on every port and network segmentation header"
|
|
assert relevance_score(title, "", _NET_KW, False, None) == 0.2
|
|
|
|
|
|
def test_relevance_keyword_match_is_case_insensitive_over_title_and_objective():
|
|
score = relevance_score("FIREWALL", "tls config", _NET_KW, False, None)
|
|
# two hits → 2/3 * 0.2 ≈ 0.133
|
|
assert score == pytest.approx(0.133, abs=0.001)
|
|
|
|
|
|
def test_relevance_is_clamped_to_one():
|
|
title = "firewall tls port" # 3 hits → +0.2
|
|
assert relevance_score(title, "", _NET_KW, True, 1.0) == 1.0
|
|
|
|
|
|
def test_relevance_no_keyword_tokens_yields_zero_content_term():
|
|
assert relevance_score("anything", "here", (), True, 1.0) == 0.8
|
|
|
|
|
|
def test_controls_for_unknown_use_case_raises_not_found():
|
|
svc = UseCaseControlsService(db=None) # guard runs before any DB access
|
|
with pytest.raises(NotFoundError):
|
|
svc.controls_for_use_case("does_not_exist")
|
|
|
|
|
|
def test_tier_label_maps_relevance_to_soft_tier():
|
|
assert tier_label(True) == "core"
|
|
assert tier_label(False) == "review"
|
|
|
|
|
|
def test_source_type_own_library_vs_derived():
|
|
# license_rule 3 = self-written framework, no commercial source
|
|
assert source_type(3) == "own_library"
|
|
# license 1 (public domain/EU/NIST) and 2 (CC-BY) are derived from a document
|
|
assert source_type(1) == "derived"
|
|
assert source_type(2) == "derived"
|
|
assert source_type(None) == "derived"
|
|
|
|
|
|
def test_addressee_applicable_defaults_to_true_when_unknown():
|
|
# NULL / company / public body = applicable (nothing hidden by default)
|
|
assert addressee_applicable(None) is True
|
|
assert addressee_applicable("unternehmen") is True
|
|
assert addressee_applicable("oeffentliche_stelle") is True
|
|
|
|
|
|
def test_addressee_applicable_false_for_out_of_scope():
|
|
for ad in ("aufsichtsbefugnis", "staat_eu", "dritter", "meta"):
|
|
assert addressee_applicable(ad) is False
|
|
|
|
|
|
def test_addressee_is_gov_only_for_public_body():
|
|
assert addressee_is_gov("oeffentliche_stelle") is True
|
|
assert addressee_is_gov("unternehmen") is False
|
|
assert addressee_is_gov(None) is False
|